-
-
Notifications
You must be signed in to change notification settings - Fork 718
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
WIP Subscribe events - PubSub Extension #5218
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,7 +8,7 @@ | |
|
||
from .core import CommClosedError | ||
from .metrics import time | ||
from .protocol.serialize import to_serialize | ||
from .protocol.serialize import Serialized, deserialize # , to_serialize | ||
from .utils import TimeoutError, sync | ||
|
||
logger = logging.getLogger(__name__) | ||
|
@@ -36,13 +36,21 @@ def __init__(self, scheduler): | |
|
||
self.scheduler.extensions["pubsub"] = self | ||
|
||
def add_publisher(self, comm=None, name=None, worker=None): | ||
def add_publisher(self, comm=None, name=None, worker=None, log_queue=False): | ||
logger.debug("Add publisher: %s %s", name, worker) | ||
self.publishers[name].add(worker) | ||
|
||
if log_queue: | ||
self.scheduler.events[name] # init defaultdict | ||
assert name in self.scheduler.events | ||
|
||
return { | ||
"subscribers": {addr: {} for addr in self.subscribers[name]}, | ||
"publish-scheduler": name in self.client_subscribers | ||
and len(self.client_subscribers[name]) > 0, | ||
"publish-scheduler": ( | ||
name in self.client_subscribers | ||
and len(self.client_subscribers[name]) > 0 | ||
) | ||
or name in self.scheduler.events, | ||
} | ||
|
||
def add_subscriber(self, comm=None, name=None, worker=None, client=None): | ||
|
@@ -75,15 +83,15 @@ def remove_publisher(self, comm=None, name=None, worker=None): | |
def remove_subscriber(self, comm=None, name=None, worker=None, client=None): | ||
if worker: | ||
logger.debug("Remove worker subscriber: %s %s", name, worker) | ||
self.subscribers[name].remove(worker) | ||
self.subscribers[name].discard(worker) | ||
for pub in self.publishers[name]: | ||
self.scheduler.worker_send( | ||
pub, | ||
{"op": "pubsub-remove-subscriber", "address": worker, "name": name}, | ||
) | ||
elif client: | ||
logger.debug("Remove client subscriber: %s %s", name, client) | ||
self.client_subscribers[name].remove(client) | ||
self.client_subscribers[name].discard(client) | ||
if not self.client_subscribers[name]: | ||
del self.client_subscribers[name] | ||
for pub in self.publishers[name]: | ||
|
@@ -110,6 +118,14 @@ def handle_message(self, name=None, msg=None, worker=None, client=None): | |
except (KeyError, CommClosedError): | ||
self.remove_subscriber(name=name, client=c) | ||
|
||
if name in self.scheduler.events: | ||
# FIXME: Am I allowed to do this? Feels evil | ||
if isinstance(msg, Serialized): | ||
msg = deserialize(msg.header, msg.frames) | ||
event = (time(), msg) | ||
self.scheduler.events[name].append(event) | ||
self.scheduler.event_counts[name] += 1 | ||
|
||
if client: | ||
for sub in self.subscribers[name]: | ||
self.scheduler.worker_send( | ||
|
@@ -143,7 +159,7 @@ def add_subscriber(self, name=None, address=None, **info): | |
|
||
def remove_subscriber(self, name=None, address=None): | ||
for pub in self.publishers[name]: | ||
del pub.subscribers[address] | ||
pub.subscribers.pop(address, None) | ||
|
||
def publish_scheduler(self, name=None, publish=None): | ||
self.publish_to_scheduler[name] = publish | ||
|
@@ -247,6 +263,8 @@ class Pub: | |
client: Client (optional) | ||
Client used for communication with the scheduler. Defaults to | ||
the value of ``get_client()``. If given, ``worker`` must be ``None``. | ||
log_queue: bool | ||
If True, log the events in an event queue on the scheduler. | ||
|
||
Examples | ||
-------- | ||
|
@@ -283,7 +301,7 @@ class Pub: | |
Sub | ||
""" | ||
|
||
def __init__(self, name, worker=None, client=None): | ||
def __init__(self, name, worker=None, client=None, log_queue=False): | ||
if worker is None and client is None: | ||
from distributed import get_client, get_worker | ||
|
||
|
@@ -304,6 +322,7 @@ def __init__(self, name, worker=None, client=None): | |
self.loop = self.client.loop | ||
|
||
self.name = name | ||
self.log_queue = log_queue | ||
self._started = False | ||
self._buffer = [] | ||
|
||
|
@@ -317,7 +336,7 @@ def __init__(self, name, worker=None, client=None): | |
async def _start(self): | ||
if self.worker: | ||
result = await self.scheduler.pubsub_add_publisher( | ||
name=self.name, worker=self.worker.address | ||
name=self.name, worker=self.worker.address, log_queue=self.log_queue | ||
) | ||
pubsub = self.worker.extensions["pubsub"] | ||
self.subscribers.update(result["subscribers"]) | ||
|
@@ -334,7 +353,8 @@ def _put(self, msg): | |
self._buffer.append(msg) | ||
return | ||
|
||
data = {"op": "pubsub-msg", "name": self.name, "msg": to_serialize(msg)} | ||
# FIXME If I use to_serialize here, this breaks msgs of type dict! | ||
data = {"op": "pubsub-msg", "name": self.name, "msg": msg} | ||
Comment on lines
+356
to
+357
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This broke for simple dict type messages by raising a keyerror on on the remote side. probably a bug but how to deal with the serialized objects in general would be one of my questions |
||
|
||
if self.worker: | ||
for sub in self.subscribers: | ||
|
@@ -388,12 +408,7 @@ def __init__(self, name, worker=None, client=None): | |
self.loop.add_callback(pubsub.subscribers[name].add, self) | ||
|
||
msg = {"op": "pubsub-add-subscriber", "name": self.name} | ||
if self.worker: | ||
self.loop.add_callback(self.worker.batched_stream.send, msg) | ||
elif self.client: | ||
self.loop.add_callback(self.client.scheduler_comm.send, msg) | ||
else: | ||
raise Exception() | ||
self._send_message(msg) | ||
|
||
weakref.finalize(self, pubsub.trigger_cleanup) | ||
|
||
|
@@ -461,6 +476,18 @@ async def _put(self, msg): | |
async with self.condition: | ||
self.condition.notify() | ||
|
||
def _send_message(self, msg): | ||
if self.worker: | ||
self.loop.add_callback(self.worker.batched_stream.send, msg) | ||
elif self.client: | ||
self.loop.add_callback(self.client.scheduler_comm.send, msg) | ||
else: | ||
raise Exception() | ||
|
||
def stop(self): | ||
msg = {"op": "pubsub-remove-subscriber", "name": self.name} | ||
self._send_message(msg) | ||
|
||
def __repr__(self): | ||
return f"<Sub: {self.name}>" | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7399,14 +7399,11 @@ async def get_worker_logs(self, comm=None, n=None, workers=None, nanny=False): | |
return results | ||
|
||
def log_event(self, name, msg): | ||
event = (time(), msg) | ||
if isinstance(name, list): | ||
for n in name: | ||
self.events[n].append(event) | ||
self.event_counts[n] += 1 | ||
else: | ||
self.events[name].append(event) | ||
self.event_counts[name] += 1 | ||
if not isinstance(name, list): | ||
name = [name] | ||
for n in name: | ||
self.events[n] # init defaultdict | ||
self.extensions["pubsub"].handle_message(name=n, msg=msg) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is currently ugly but the handle_message of the extension will append the event to the deque and inc the counter now |
||
|
||
def get_events(self, comm=None, topic=None): | ||
if topic is not None: | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is mostly convenience stuff to execute an event handler on client side. The actual subscription is handled via the extension