Skip to content

Commit

Permalink
Merge pull request #108 from edgenai/feat/context-hint
Browse files Browse the repository at this point in the history
added context size hint
  • Loading branch information
pedro-devv authored Mar 4, 2024
2 parents 3e73160 + 43f0144 commit 281d561
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 2 deletions.
2 changes: 2 additions & 0 deletions crates/edgen_core/src/llm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ pub struct CompletionArgs {
pub one_shot: bool,
pub seed: Option<u32>,
pub frequency_penalty: f32,
pub context_hint: Option<u32>,
}

impl Default for CompletionArgs {
Expand All @@ -58,6 +59,7 @@ impl Default for CompletionArgs {
one_shot: false,
seed: None,
frequency_penalty: 0.0,
context_hint: None,
}
}
}
Expand Down
4 changes: 2 additions & 2 deletions crates/edgen_rt_llama_cpp/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@ impl UnloadingModel {
//params.seed = args.seed;
params.n_threads = threads;
params.n_threads_batch = threads;
params.n_ctx = CONTEXT_SIZE;
params.n_ctx = args.context_hint.unwrap_or(CONTEXT_SIZE);

let mut session = model_guard
.create_session(params)
Expand Down Expand Up @@ -306,7 +306,7 @@ impl UnloadingModel {
//params.seed = args.seed;
params.n_threads = threads;
params.n_threads_batch = threads;
params.n_ctx = CONTEXT_SIZE;
params.n_ctx = args.context_hint.unwrap_or(CONTEXT_SIZE);

let session = model_guard
.create_session(params)
Expand Down
8 changes: 8 additions & 0 deletions crates/edgen_server/src/openai_shim.rs
Original file line number Diff line number Diff line change
Expand Up @@ -367,6 +367,13 @@ pub struct CreateChatCompletionRequest<'a> {
/// Indicate if this is an isolated request, with no associated past or future context. This may allow for
/// optimisations in some implementations. Default: `false`
pub one_shot: Option<bool>,

/// A hint for how big a context will be.
///
/// # Warning
/// An unsound hint may severely drop performance and/or inference quality, and in some cases even cause Edgen
/// to crash. Do not set this value unless you know what you are doing.
pub context_hint: Option<u32>,
}

/// A message in a chat completion.
Expand Down Expand Up @@ -633,6 +640,7 @@ pub async fn chat_completions(
let mut args = CompletionArgs {
prompt: untokenized_context,
seed: req.seed,
context_hint: req.context_hint,
..Default::default()
};

Expand Down

0 comments on commit 281d561

Please sign in to comment.