-
Notifications
You must be signed in to change notification settings - Fork 859
/
inference.proto
52 lines (37 loc) · 1.5 KB
/
inference.proto
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
syntax = "proto3";
package org.pytorch.serve.grpc.inference;
import "google/protobuf/empty.proto";
import "google/rpc/status.proto";
option java_multiple_files = true;
message PredictionsRequest {
// Name of model.
string model_name = 1; //required
// Version of model to run prediction on.
string model_version = 2; //optional
// Input data for model prediction
map<string, bytes> input = 3; //required
// SequenceId is required for StreamPredictions2 API.
optional string sequence_id = 4; //optional
}
message PredictionResponse {
// Response content for prediction
bytes prediction = 1;
// SequenceId is required for StreamPredictions2 API.
optional string sequence_id = 2; //optional
// Error information for StreamPredictions2 API.
optional google.rpc.Status status = 3; //optional
}
message TorchServeHealthResponse {
// TorchServe health
string health = 1;
}
service InferenceAPIsService {
// Check health status of the TorchServe server.
rpc Ping(google.protobuf.Empty) returns (TorchServeHealthResponse) {}
// Predictions entry point to get inference using default model version.
rpc Predictions(PredictionsRequest) returns (PredictionResponse) {}
// Streaming response for an inference request.
rpc StreamPredictions(PredictionsRequest) returns (stream PredictionResponse) {}
// Bi-direction streaming inference and response.
rpc StreamPredictions2(stream PredictionsRequest) returns (stream PredictionResponse) {}
}