-
Notifications
You must be signed in to change notification settings - Fork 4.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[WIP] dns: add support for SRV records in DNS lookup #6379
Changes from all commits
5dbe4cc
b8e7008
0401016
ca67957
369582c
7b260d0
2c6da1a
a302103
9da69fd
7c5dcdd
8f937ff
5c42041
303ccca
b0dc4da
b4e9b37
dbbdfbd
601c1f6
406eb1e
b63a81c
a8e2903
dc152d2
ea167bc
6faaddd
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,6 @@ | ||
#include "common/network/dns_impl.h" | ||
|
||
#include <arpa/nameser.h> | ||
#include <netdb.h> | ||
#include <netinet/in.h> | ||
#include <sys/socket.h> | ||
|
@@ -67,6 +68,30 @@ void DnsResolverImpl::initializeChannel(ares_options* options, int optmask) { | |
ares_init_options(&channel_, options, optmask | ARES_OPT_SOCK_STATE_CB); | ||
} | ||
|
||
bool DnsResolverImpl::PendingResolutionBase::fireCallback(std::list<DnsResponse>&& response) { | ||
if (completed_) { | ||
if (!cancelled_) { | ||
try { | ||
callback_(std::move(response)); | ||
} catch (const EnvoyException& e) { | ||
ENVOY_LOG(critical, "EnvoyException in c-ares callback"); | ||
dispatcher_.post([s = std::string(e.what())] { throw EnvoyException(s); }); | ||
} catch (const std::exception& e) { | ||
ENVOY_LOG(critical, "std::exception in c-ares callback"); | ||
dispatcher_.post([s = std::string(e.what())] { throw EnvoyException(s); }); | ||
} catch (...) { | ||
ENVOY_LOG(critical, "Unknown exception in c-ares callback"); | ||
dispatcher_.post([] { throw EnvoyException("unknown"); }); | ||
} | ||
} | ||
if (owned_) { | ||
delete this; | ||
return true; | ||
} | ||
} | ||
return false; | ||
} | ||
|
||
void DnsResolverImpl::PendingResolution::onAresGetAddrInfoCallback(int status, int timeouts, | ||
ares_addrinfo* addrinfo) { | ||
// We receive ARES_EDESTRUCTION when destructing with pending queries. | ||
|
@@ -120,25 +145,8 @@ void DnsResolverImpl::PendingResolution::onAresGetAddrInfoCallback(int status, i | |
ENVOY_LOG(debug, "DNS request timed out {} times", timeouts); | ||
} | ||
|
||
if (completed_) { | ||
if (!cancelled_) { | ||
try { | ||
callback_(std::move(address_list)); | ||
} catch (const EnvoyException& e) { | ||
ENVOY_LOG(critical, "EnvoyException in c-ares callback"); | ||
dispatcher_.post([s = std::string(e.what())] { throw EnvoyException(s); }); | ||
} catch (const std::exception& e) { | ||
ENVOY_LOG(critical, "std::exception in c-ares callback"); | ||
dispatcher_.post([s = std::string(e.what())] { throw EnvoyException(s); }); | ||
} catch (...) { | ||
ENVOY_LOG(critical, "Unknown exception in c-ares callback"); | ||
dispatcher_.post([] { throw EnvoyException("unknown"); }); | ||
} | ||
} | ||
if (owned_) { | ||
delete this; | ||
return; | ||
} | ||
if (fireCallback(std::move(address_list))) { | ||
return; | ||
} | ||
|
||
if (!completed_ && fallback_if_failed_) { | ||
|
@@ -192,6 +200,23 @@ void DnsResolverImpl::onAresSocketStateChange(int fd, int read, int write) { | |
(write ? Event::FileReadyType::Write : 0)); | ||
} | ||
|
||
ActiveDnsQuery* DnsResolverImpl::preparePendingResolution( | ||
std::unique_ptr<PendingResolutionBase> pending_resolution) { | ||
if (pending_resolution->completed_) { | ||
// Resolution does not need asynchronous behavior or network events. For | ||
// example, localhost lookup. | ||
return nullptr; | ||
} else { | ||
// Enable timer to wake us up if the request times out. | ||
updateAresTimer(); | ||
|
||
// The PendingResolutionBase will self-delete when the request completes | ||
// (including if cancelled or if ~DnsResolverImpl() happens). | ||
pending_resolution->owned_ = true; | ||
return pending_resolution.release(); | ||
} | ||
} | ||
|
||
ActiveDnsQuery* DnsResolverImpl::resolve(const std::string& dns_name, | ||
DnsLookupFamily dns_lookup_family, ResolveCb callback) { | ||
// TODO(hennna): Add DNS caching which will allow testing the edge case of a | ||
|
@@ -209,19 +234,7 @@ ActiveDnsQuery* DnsResolverImpl::resolve(const std::string& dns_name, | |
pending_resolution->getAddrInfo(AF_INET6); | ||
} | ||
|
||
if (pending_resolution->completed_) { | ||
// Resolution does not need asynchronous behavior or network events. For | ||
// example, localhost lookup. | ||
return nullptr; | ||
} else { | ||
// Enable timer to wake us up if the request times out. | ||
updateAresTimer(); | ||
|
||
// The PendingResolution will self-delete when the request completes | ||
// (including if cancelled or if ~DnsResolverImpl() happens). | ||
pending_resolution->owned_ = true; | ||
return pending_resolution.release(); | ||
} | ||
return preparePendingResolution(std::move(pending_resolution)); | ||
} | ||
|
||
void DnsResolverImpl::PendingResolution::getAddrInfo(int family) { | ||
|
@@ -242,5 +255,90 @@ void DnsResolverImpl::PendingResolution::getAddrInfo(int family) { | |
this); | ||
} | ||
|
||
ActiveDnsQuery* DnsResolverImpl::resolveSrv(const std::string& dns_name, | ||
DnsLookupFamily dns_lookup_family, ResolveCb callback) { | ||
std::unique_ptr<PendingSrvResolution> pending_srv_res( | ||
new PendingSrvResolution(callback, dispatcher_, channel_, dns_name, dns_lookup_family, this)); | ||
pending_srv_res->getSrvByName(); | ||
return preparePendingResolution(std::move(pending_srv_res)); | ||
} | ||
|
||
void DnsResolverImpl::PendingSrvResolution::onAresSrvStartCallback(int status, int timeouts, | ||
unsigned char* buf, int len) { | ||
// We receive ARES_EDESTRUCTION when destructing with pending queries. | ||
if (status == ARES_EDESTRUCTION) { | ||
ASSERT(owned_); | ||
delete this; | ||
return; | ||
} | ||
|
||
bool replies_parsed = false; | ||
if (status == ARES_SUCCESS) { | ||
struct ares_srv_reply* srv_reply; | ||
status = ares_parse_srv_reply(buf, len, &srv_reply); | ||
|
||
if (status == ARES_SUCCESS) { | ||
size_t total = 0; | ||
for (ares_srv_reply* current_reply = srv_reply; current_reply != NULL; | ||
current_reply = current_reply->next) { | ||
total++; | ||
} | ||
|
||
std::shared_ptr<std::atomic<ssize_t>> finished = std::make_shared<std::atomic<ssize_t>>(0); | ||
std::shared_ptr<std::list<DnsResponse>> srv_records = | ||
std::make_shared<std::list<DnsResponse>>(); | ||
std::shared_ptr<std::mutex> mutex = std::make_shared<std::mutex>(); | ||
for (ares_srv_reply* current_reply = srv_reply; current_reply != NULL; | ||
current_reply = current_reply->next) { | ||
resolver_->resolve( | ||
current_reply->host, this->dns_lookup_family_, | ||
[this, total, finished, srv_records, mutex, | ||
current_reply](const std::list<DnsResponse>&& response) { | ||
for (auto instance = response.begin(); instance != response.end(); ++instance) { | ||
Address::InstanceConstSharedPtr inst_with_port( | ||
Utility::getAddressWithPort(*instance->address_, current_reply->port)); | ||
mutex->lock(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why are we taking locks? Isn't everything thread local on this dispatcher? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Are threads and dispatchers always coupled? I think so, but I'm not 100% sure. So any operations happening through a particular dispatcher will operate on the same thread? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For the purpose of DNS resolution, this should be true (that they are coupled). |
||
srv_records->emplace_back(DnsResponse(inst_with_port, instance->ttl_)); | ||
mutex->unlock(); | ||
} | ||
if (static_cast<unsigned>(++(*finished)) == total) { | ||
onAresSrvFinishCallback(std::move(*srv_records)); | ||
} | ||
}); | ||
} | ||
replies_parsed = true; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we need an explicit |
||
} | ||
|
||
ares_free_data(srv_reply); | ||
} | ||
|
||
if (timeouts > 0) { | ||
ENVOY_LOG(debug, "DNS request timed out {} times while querying for SRV records", timeouts); | ||
} | ||
|
||
if (!replies_parsed) { | ||
onAresSrvFinishCallback({}); | ||
} | ||
} | ||
|
||
void DnsResolverImpl::PendingSrvResolution::onAresSrvFinishCallback( | ||
std::list<DnsResponse>&& srv_records) { | ||
if (!srv_records.empty()) { | ||
completed_ = true; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Isn't this true regardless of the length of the srv_records? I.e. the query is over, we're not waiting any longer? |
||
} | ||
|
||
fireCallback(std::move(srv_records)); | ||
} | ||
|
||
void DnsResolverImpl::PendingSrvResolution::getSrvByName() { | ||
ares_query( | ||
channel_, dns_name_.c_str(), ns_c_in, ns_t_srv, | ||
[](void* arg, int status, int timeouts, unsigned char* abuf, int alen) { | ||
static_cast<PendingSrvResolution*>(arg)->onAresSrvStartCallback(status, timeouts, abuf, | ||
alen); | ||
}, | ||
this); | ||
} | ||
|
||
} // namespace Network | ||
} // namespace Envoy |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I know the existing API does it this way, but I'd be interested if we could make ActiveDnsQuery RAII.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Would you be willing to explain what you mean here more? The ActiveDnsQuery abstract class only has one method and no data members. The PendingResolution struct is derived from ActiveDnsQuery and contains data, is that what you want to be RAII? Do you know why a struct was used instead of a class? Was it just to avoid another class definition?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think I was suggesting that the returned
ActiveDnsQuery
be a unique ptr. The idea is that if this returned object is then destructed, it gives you automagic cancellation of the request.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@htuch Are you saying it's better to return a
std::unique_ptr<ActiveDnsQuery>
instead ofActiveDnsQuery*
? I'm not quite sure how it actually cancels the pending dns request. Mind elaborating a bit more? (Sorry if the answer is obvious as I'm pretty new to C++ :) )There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Right, return
std::unique_ptr<ActiveDnsQuery>
. The destructor forActiveDnsQuery
can then perform the cancellation.