Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

⚡️ Speed up _determine_reference_key() by 30% in libs/langchain/langchain/smith/evaluation/runner_utils.py #27

Open
wants to merge 1 commit into
base: master
Choose a base branch
from

Conversation

codeflash-ai[bot]
Copy link

@codeflash-ai codeflash-ai bot commented Feb 16, 2024

📄 _determine_reference_key() in libs/langchain/langchain/smith/evaluation/runner_utils.py

📈 Performance went up by 30% (0.30x faster)

⏱️ Runtime went down from 13.60μs to 10.50μs

Explanation and details

(click to show)

The existing python program seems to be optimized at a good extent, but it can be rewritten as below to make it a bit faster by avoiding extra variable usage and avoiding calling list() unnecessarily.

This tweaked function has the same functionality as the original one but involves fewer operations.

Correctness verification

The new optimized code was tested for correctness. The results are listed below.

✅ 0 Passed − ⚙️ Existing Unit Tests

✅ 0 Passed − 🎨 Inspired Regression Tests

✅ 8 Passed − 🌀 Generated Regression Tests

(click to show generated tests)
# imports
import pytest  # used for our unit tests
from typing import List, Optional
from dataclasses import dataclass

# Assuming the existence of the RunEvalConfig class, we mock it here for our tests
@dataclass
class RunEvalConfig:
    reference_key: Optional[str] = None
from langchain.smith.evaluation.runner_utils import _determine_reference_key
# unit tests

# Test when config has a valid reference key and example_outputs is None
def test_valid_reference_key_no_example_outputs():
    config = RunEvalConfig(reference_key="valid_key")
    assert _determine_reference_key(config, None) == "valid_key"

# Test when config has a valid reference key and it is in example_outputs
def test_valid_reference_key_in_example_outputs():
    config = RunEvalConfig(reference_key="valid_key")
    example_outputs = ["valid_key"]
    assert _determine_reference_key(config, example_outputs) == "valid_key"

# Test when config has a valid reference key but it is not in example_outputs
def test_valid_reference_key_not_in_example_outputs():
    config = RunEvalConfig(reference_key="valid_key")
    example_outputs = ["other_key"]
    with pytest.raises(ValueError):
        _determine_reference_key(config, example_outputs)

# Test when config does not have a reference key and example_outputs has one item
def test_no_reference_key_single_example_output():
    config = RunEvalConfig()
    example_outputs = ["only_key"]
    assert _determine_reference_key(config, example_outputs) == "only_key"

# Test when config does not have a reference key and example_outputs is None
def test_no_reference_key_no_example_outputs():
    config = RunEvalConfig()
    assert _determine_reference_key(config, None) is None

# Test when config does not have a reference key and example_outputs has multiple items
def test_no_reference_key_multiple_example_outputs():
    config = RunEvalConfig()
    example_outputs = ["key1", "key2"]
    assert _determine_reference_key(config, example_outputs) is None

# Test when config has a reference key set to an empty string
def test_empty_string_reference_key():
    config = RunEvalConfig(reference_key="")
    assert _determine_reference_key(config, None) == ""

# Test when config has a reference key set to a falsy value
def test_falsy_reference_key():
    config = RunEvalConfig(reference_key=False)  # Assuming False can be a valid type for reference_key
    example_outputs = ["False"]
    assert _determine_reference_key(config, example_outputs) == False

# Test when example_outputs contains non-string elements
def test_example_outputs_with_non_string_elements():
    config = RunEvalConfig()
    example_outputs = [123, "key"]
    with pytest.raises(TypeError):
        _determine_reference_key(config, example_outputs)

# Test when example_outputs is a different type (e.g., a single string)
def test_example_outputs_with_incorrect_type():
    config = RunEvalConfig()
    example_outputs = "key"
    with pytest.raises(TypeError):
        _determine_reference_key(config, example_outputs)

# Test when example_outputs contains a large number of elements
@pytest.mark.parametrize("key", ["valid_key", "other_key"])
def test_large_example_outputs(key):
    config = RunEvalConfig(reference_key=key)
    example_outputs = [f"key_{i}" for i in range(1000)]
    if key == "valid_key":
        example_outputs.append(key)
        assert _determine_reference_key(config, example_outputs) == key
    else:
        with pytest.raises(ValueError):
            _determine_reference_key(config, example_outputs)

@codeflash-ai codeflash-ai bot added the ⚡️ codeflash Optimization PR opened by CodeFlash AI label Feb 16, 2024
@codeflash-ai codeflash-ai bot requested a review from aphexcx February 16, 2024 09:41
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
⚡️ codeflash Optimization PR opened by CodeFlash AI
Projects
None yet
Development

Successfully merging this pull request may close these issues.

0 participants