-
Notifications
You must be signed in to change notification settings - Fork 1.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Support Kafka record headers #1574
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -513,7 +513,7 @@ def _estimate_size_in_bytes(self, key, value, headers=[]): | |
return LegacyRecordBatchBuilder.estimate_size_in_bytes( | ||
magic, self.config['compression_type'], key, value) | ||
|
||
def send(self, topic, value=None, key=None, partition=None, timestamp_ms=None): | ||
def send(self, topic, value=None, key=None, headers=None, partition=None, timestamp_ms=None): | ||
"""Publish a message to a topic. | ||
|
||
Arguments: | ||
|
@@ -534,6 +534,8 @@ def send(self, topic, value=None, key=None, partition=None, timestamp_ms=None): | |
partition (but if key is None, partition is chosen randomly). | ||
Must be type bytes, or be serializable to bytes via configured | ||
key_serializer. | ||
headers (optional): a list of header key value pairs. List items | ||
are tuples of str key and bytes value. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why not use a simple There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Probably because I defined it so on parser level. Do you happen to know if header structures support multiple keys, like http headers do? I'm kind of convinced they do. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. https://cwiki.apache.org/confluence/display/KAFKA/KIP-82+-+Add+Record+Headers insists on 1) "duplicate headers with the same key must be supported" as well as 2) "The order of headers must be retained throughout a record's end-to-end lifetime: from producer to consumer" I agree a dict would be a nicer interface, but cannot easily satisfy the original KIP-82 requirement. |
||
timestamp_ms (int, optional): epoch milliseconds (from Jan 1 1970 UTC) | ||
to use as the message timestamp. Defaults to current time. | ||
|
||
|
@@ -563,13 +565,18 @@ def send(self, topic, value=None, key=None, partition=None, timestamp_ms=None): | |
partition = self._partition(topic, partition, key, value, | ||
key_bytes, value_bytes) | ||
|
||
message_size = self._estimate_size_in_bytes(key_bytes, value_bytes) | ||
if headers is None: | ||
headers = [] | ||
assert type(headers) == list | ||
assert all(type(item) == tuple and len(item) == 2 and type(item[0]) == str and type(item[1]) == bytes for item in headers) | ||
|
||
message_size = self._estimate_size_in_bytes(key_bytes, value_bytes, headers) | ||
self._ensure_valid_record_size(message_size) | ||
|
||
tp = TopicPartition(topic, partition) | ||
log.debug("Sending (key=%r value=%r) to %s", key, value, tp) | ||
log.debug("Sending (key=%r value=%r headers=%r) to %s", key, value, headers, tp) | ||
result = self._accumulator.append(tp, timestamp_ms, | ||
key_bytes, value_bytes, | ||
key_bytes, value_bytes, headers, | ||
self.config['max_block_ms'], | ||
estimated_size=message_size) | ||
future, batch_is_full, new_batch_created = result | ||
|
@@ -588,7 +595,8 @@ def send(self, topic, value=None, key=None, partition=None, timestamp_ms=None): | |
FutureProduceResult(TopicPartition(topic, partition)), | ||
-1, None, None, | ||
len(key_bytes) if key_bytes is not None else -1, | ||
len(value_bytes) if value_bytes is not None else -1 | ||
len(value_bytes) if value_bytes is not None else -1, | ||
sum(len(h_key.encode("utf-8")) + len(h_value) for h_key, h_value in headers) if headers else -1, | ||
).failure(e) | ||
|
||
def flush(self, timeout=None): | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -91,10 +91,16 @@ def test_kafka_producer_proper_record_metadata(kafka_broker, compression): | |
compression_type=compression) | ||
magic = producer._max_usable_produce_magic() | ||
|
||
# record headers are supported in 0.11.0 | ||
if version() < (0, 11, 0): | ||
headers = None | ||
else: | ||
headers = [("Header Key", b"Header Value")] | ||
|
||
topic = random_string(5) | ||
future = producer.send( | ||
topic, | ||
value=b"Simple value", key=b"Simple key", timestamp_ms=9999999, | ||
value=b"Simple value", key=b"Simple key", headers=headers, timestamp_ms=9999999, | ||
partition=0) | ||
record = future.get(timeout=5) | ||
assert record is not None | ||
|
@@ -116,6 +122,8 @@ def test_kafka_producer_proper_record_metadata(kafka_broker, compression): | |
|
||
assert record.serialized_key_size == 10 | ||
assert record.serialized_value_size == 12 | ||
if headers: | ||
assert record.serialized_header_size == 22 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we check exact header data here. I want to be sure we have str as keys and bytes as values. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The record here refers to the FutureRecordMetadata / RecordMetadata that doesn't carry the actual stored values. I've verified this manually for both Python 3.6 and 2.7 with a producer & consumer running through actual Kafka service instance. The types are str, bytes for 3.6 and unicode, str/bytes for 2.7. |
||
|
||
# generated timestamp case is skipped for broker 0.9 and below | ||
if magic == 0: | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is this also a list of tuples? If so, same question as below re: using dict. Also, where can we document this for users?