diff --git a/docs/docs/guides/evaluation/string/json.ipynb b/docs/docs/guides/evaluation/string/json.ipynb index baaaa0afb34cb..a669e6302b0ac 100644 --- a/docs/docs/guides/evaluation/string/json.ipynb +++ b/docs/docs/guides/evaluation/string/json.ipynb @@ -221,7 +221,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 8, "id": "7a8f3ec5-1cde-4b0e-80cd-ac0ac290d375", "metadata": {}, "outputs": [ @@ -261,11 +261,102 @@ "print(result)" ] }, + { + "cell_type": "markdown", + "id": "6b15d18e-9b97-434f-905c-70acd4c35aea", + "metadata": {}, + "source": [ + "## JsonSchemaEvaluator\n", + "\n", + "The `JsonSchemaEvaluator` validates a JSON prediction against a provided JSON schema. If the prediction conforms to the schema, it returns a score of True (indicating no errors). Otherwise, it returns a score of 0 (indicating an error).\n", + "\n", + "### Overview:\n", + "- **Requires Input?**: Yes\n", + "- **Requires Reference?**: Yes (A JSON schema)\n", + "- **Score**: True (No errors) or False (Error occurred)" + ] + }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "id": "85afcf33-d2f4-406e-9d8f-15dc0a4772f2", "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'score': True}\n" + ] + } + ], + "source": [ + "from langchain.evaluation import JsonSchemaEvaluator\n", + "\n", + "evaluator = JsonSchemaEvaluator()\n", + "# Equivalently\n", + "# evaluator = load_evaluator(\"json_schema_validation\")\n", + "\n", + "result = evaluator.evaluate_strings(\n", + " prediction='{\"name\": \"John\", \"age\": 30}',\n", + " reference={\n", + " \"type\": \"object\",\n", + " \"properties\": {\"name\": {\"type\": \"string\"}, \"age\": {\"type\": \"integer\"}},\n", + " },\n", + ")\n", + "print(result)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "bb5b89f6-0c87-4335-9091-55fd67a0565f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'score': True}\n" + ] + } + ], + "source": [ + "result = evaluator.evaluate_strings(\n", + " prediction='{\"name\": \"John\", \"age\": 30}',\n", + " reference='{\"type\": \"object\", \"properties\": {\"name\": {\"type\": \"string\"}, \"age\": {\"type\": \"integer\"}}}',\n", + ")\n", + "print(result)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "ff914d24-36bc-482a-a9ba-259cd0dd2a52", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'score': False, 'reasoning': \"\"}\n" + ] + } + ], + "source": [ + "result = evaluator.evaluate_strings(\n", + " prediction='{\"name\": \"John\", \"age\": 30}',\n", + " reference='{\"type\": \"object\", \"properties\": {\"name\": {\"type\": \"string\"},'\n", + " '\"age\": {\"type\": \"integer\", \"minimum\": 66}}}',\n", + ")\n", + "print(result)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b073f12d-4603-481c-8081-fab1af6bfcfe", + "metadata": {}, "outputs": [], "source": [] } diff --git a/libs/langchain/langchain/evaluation/__init__.py b/libs/langchain/langchain/evaluation/__init__.py index dd87630b5637d..b98065208c671 100644 --- a/libs/langchain/langchain/evaluation/__init__.py +++ b/libs/langchain/langchain/evaluation/__init__.py @@ -74,6 +74,7 @@ JsonValidityEvaluator, ) from langchain.evaluation.parsing.json_distance import JsonEditDistanceEvaluator +from langchain.evaluation.parsing.json_schema import JsonSchemaEvaluator from langchain.evaluation.qa import ContextQAEvalChain, CotQAEvalChain, QAEvalChain from langchain.evaluation.regex_match.base import RegexMatchStringEvaluator from langchain.evaluation.schema import ( @@ -122,4 +123,5 @@ "JsonValidityEvaluator", "JsonEqualityEvaluator", "JsonEditDistanceEvaluator", + "JsonSchemaEvaluator", ] diff --git a/libs/langchain/langchain/evaluation/loading.py b/libs/langchain/langchain/evaluation/loading.py index ac218f0ca5869..0f1788be9ecbd 100644 --- a/libs/langchain/langchain/evaluation/loading.py +++ b/libs/langchain/langchain/evaluation/loading.py @@ -20,6 +20,7 @@ JsonValidityEvaluator, ) from langchain.evaluation.parsing.json_distance import JsonEditDistanceEvaluator +from langchain.evaluation.parsing.json_schema import JsonSchemaEvaluator from langchain.evaluation.qa import ContextQAEvalChain, CotQAEvalChain, QAEvalChain from langchain.evaluation.regex_match.base import RegexMatchStringEvaluator from langchain.evaluation.schema import EvaluatorType, LLMEvalChain, StringEvaluator @@ -88,6 +89,7 @@ def load_dataset(uri: str) -> List[Dict]: EvaluatorType.JSON_VALIDITY: JsonValidityEvaluator, EvaluatorType.JSON_EQUALITY: JsonEqualityEvaluator, EvaluatorType.JSON_EDIT_DISTANCE: JsonEditDistanceEvaluator, + EvaluatorType.JSON_SCHEMA_VALIDATION: JsonSchemaEvaluator, EvaluatorType.REGEX_MATCH: RegexMatchStringEvaluator, EvaluatorType.EXACT_MATCH: ExactMatchStringEvaluator, } diff --git a/libs/langchain/langchain/evaluation/parsing/base.py b/libs/langchain/langchain/evaluation/parsing/base.py index 1504297419055..c3a28f7ce586f 100644 --- a/libs/langchain/langchain/evaluation/parsing/base.py +++ b/libs/langchain/langchain/evaluation/parsing/base.py @@ -51,7 +51,7 @@ def _evaluate_strings( prediction: str, input: Optional[str] = None, reference: Optional[str] = None, - **kwargs: Any + **kwargs: Any, ) -> dict: """Evaluate the prediction string. @@ -134,7 +134,7 @@ def _evaluate_strings( prediction: str, input: Optional[str] = None, reference: Optional[str] = None, - **kwargs: Any + **kwargs: Any, ) -> dict: """Evaluate the prediction string. diff --git a/libs/langchain/langchain/evaluation/parsing/json_distance.py b/libs/langchain/langchain/evaluation/parsing/json_distance.py index 93287136d3526..904ad5321fbc5 100644 --- a/libs/langchain/langchain/evaluation/parsing/json_distance.py +++ b/libs/langchain/langchain/evaluation/parsing/json_distance.py @@ -38,7 +38,7 @@ def __init__( self, string_distance: Optional[Callable[[str, str], float]] = None, canonicalize: Optional[Callable[[Any], Any]] = None, - **kwargs: Any + **kwargs: Any, ) -> None: super().__init__() if string_distance is not None: @@ -58,7 +58,9 @@ def __init__( self._canonicalize = canonicalize else: self._canonicalize = lambda x: json.dumps( - x, separators=(",", ":"), sort_keys=True # eliminate whitespace + x, + separators=(",", ":"), + sort_keys=True, # eliminate whitespace ) @property @@ -83,7 +85,7 @@ def _evaluate_strings( prediction: str, input: Optional[str] = None, reference: Optional[str] = None, - **kwargs: Any + **kwargs: Any, ) -> dict: parsed = self._canonicalize(self._parse_json(prediction)) label = self._canonicalize(self._parse_json(reference)) diff --git a/libs/langchain/langchain/evaluation/parsing/json_schema.py b/libs/langchain/langchain/evaluation/parsing/json_schema.py new file mode 100644 index 0000000000000..6f0473d99845f --- /dev/null +++ b/libs/langchain/langchain/evaluation/parsing/json_schema.py @@ -0,0 +1,95 @@ +from typing import Any, Union + +from langchain.evaluation.schema import StringEvaluator +from langchain.output_parsers.json import parse_json_markdown + + +class JsonSchemaEvaluator(StringEvaluator): + """An evaluator that validates a JSON prediction against a JSON schema reference. + + This evaluator checks if a given JSON prediction conforms to the provided JSON schema. + If the prediction is valid, the score is True (no errors). Otherwise, the score is False (error occurred). + + Attributes: + requires_input (bool): Whether the evaluator requires input. + requires_reference (bool): Whether the evaluator requires reference. + evaluation_name (str): The name of the evaluation. + + Examples: + evaluator = JsonSchemaEvaluator() + result = evaluator.evaluate_strings( + prediction='{"name": "John", "age": 30}', + reference={ + "type": "object", + "properties": { + "name": {"type": "string"}, + "age": {"type": "integer"} + } + } + ) + assert result["score"] is not None + + """ # noqa: E501 + + def __init__(self, **kwargs: Any) -> None: + """Initializes the JsonSchemaEvaluator. + + Args: + **kwargs: Additional keyword arguments. + + Raises: + ImportError: If the jsonschema package is not installed. + """ + super().__init__() + try: + import jsonschema # noqa: F401 + except ImportError: + raise ImportError( + "The JsonSchemaEvaluator requires the jsonschema package." + " Please install it with `pip install jsonschema`." + ) + + @property + def requires_input(self) -> bool: + """Returns whether the evaluator requires input.""" + return False + + @property + def requires_reference(self) -> bool: + """Returns whether the evaluator requires reference.""" + return True + + @property + def evaluation_name(self) -> str: + """Returns the name of the evaluation.""" + return "json_schema_validation" + + def _parse_json(self, node: Any) -> Union[dict, list, None, float, bool, int, str]: + if isinstance(node, str): + return parse_json_markdown(node) + elif hasattr(node, "schema") and callable(getattr(node, "schema")): + # Pydantic model + return getattr(node, "schema")() + return node + + def _validate(self, prediction: Any, schema: Any) -> dict: + from jsonschema import ValidationError, validate # noqa: F401 + + try: + validate(instance=prediction, schema=schema) + return { + "score": True, + } + except ValidationError as e: + return {"score": False, "reasoning": repr(e)} + + def _evaluate_strings( + self, + prediction: Union[str, Any], + input: Union[str, Any] = None, + reference: Union[str, Any] = None, + **kwargs: Any, + ) -> dict: + parsed_prediction = self._parse_json(prediction) + schema = self._parse_json(reference) + return self._validate(parsed_prediction, schema) diff --git a/libs/langchain/langchain/evaluation/schema.py b/libs/langchain/langchain/evaluation/schema.py index 43e8ed92edca2..86fdbaf016583 100644 --- a/libs/langchain/langchain/evaluation/schema.py +++ b/libs/langchain/langchain/evaluation/schema.py @@ -6,7 +6,7 @@ from abc import ABC, abstractmethod from enum import Enum from functools import partial -from typing import Any, Optional, Sequence, Tuple +from typing import Any, Optional, Sequence, Tuple, Union from warnings import warn from langchain.chains.base import Chain @@ -66,6 +66,8 @@ class EvaluatorType(str, Enum): """Check if a prediction is equal to a reference JSON.""" JSON_EDIT_DISTANCE = "json_edit_distance" """Compute the edit distance between two JSON strings after canonicalization.""" + JSON_SCHEMA_VALIDATION = "json_schema_validation" + """Check if a prediction is valid JSON according to a JSON schema.""" class LLMEvalChain(Chain): @@ -144,9 +146,9 @@ def requires_reference(self) -> bool: def _evaluate_strings( self, *, - prediction: str, - reference: Optional[str] = None, - input: Optional[str] = None, + prediction: Union[str, Any], + reference: Optional[Union[str, Any]] = None, + input: Optional[Union[str, Any]] = None, **kwargs: Any, ) -> dict: """Evaluate Chain or LLM output, based on optional input and label. @@ -167,9 +169,9 @@ def _evaluate_strings( async def _aevaluate_strings( self, *, - prediction: str, - reference: Optional[str] = None, - input: Optional[str] = None, + prediction: Union[str, Any], + reference: Optional[Union[str, Any]] = None, + input: Optional[Union[str, Any]] = None, **kwargs: Any, ) -> dict: """Asynchronously evaluate Chain or LLM output, based on optional input and label. diff --git a/libs/langchain/poetry.lock b/libs/langchain/poetry.lock index c3db3ca4b11f3..187cfff96e03d 100644 --- a/libs/langchain/poetry.lock +++ b/libs/langchain/poetry.lock @@ -3790,7 +3790,6 @@ optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*, !=3.6.*" files = [ {file = "jsonpointer-2.4-py2.py3-none-any.whl", hash = "sha256:15d51bba20eea3165644553647711d150376234112651b4f1811022aecad7d7a"}, - {file = "jsonpointer-2.4.tar.gz", hash = "sha256:585cee82b70211fa9e6043b7bb89db6e1aa49524340dde8ad6b63206ea689d88"}, ] [[package]] @@ -4598,16 +4597,6 @@ files = [ {file = "MarkupSafe-2.1.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:5bbe06f8eeafd38e5d0a4894ffec89378b6c6a625ff57e3028921f8ff59318ac"}, {file = "MarkupSafe-2.1.3-cp311-cp311-win32.whl", hash = "sha256:dd15ff04ffd7e05ffcb7fe79f1b98041b8ea30ae9234aed2a9168b5797c3effb"}, {file = "MarkupSafe-2.1.3-cp311-cp311-win_amd64.whl", hash = "sha256:134da1eca9ec0ae528110ccc9e48041e0828d79f24121a1a146161103c76e686"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:f698de3fd0c4e6972b92290a45bd9b1536bffe8c6759c62471efaa8acb4c37bc"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:aa57bd9cf8ae831a362185ee444e15a93ecb2e344c8e52e4d721ea3ab6ef1823"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ffcc3f7c66b5f5b7931a5aa68fc9cecc51e685ef90282f4a82f0f5e9b704ad11"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47d4f1c5f80fc62fdd7777d0d40a2e9dda0a05883ab11374334f6c4de38adffd"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1f67c7038d560d92149c060157d623c542173016c4babc0c1913cca0564b9939"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:9aad3c1755095ce347e26488214ef77e0485a3c34a50c5a5e2471dff60b9dd9c"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:14ff806850827afd6b07a5f32bd917fb7f45b046ba40c57abdb636674a8b559c"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8f9293864fe09b8149f0cc42ce56e3f0e54de883a9de90cd427f191c346eb2e1"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-win32.whl", hash = "sha256:715d3562f79d540f251b99ebd6d8baa547118974341db04f5ad06d5ea3eb8007"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-win_amd64.whl", hash = "sha256:1b8dd8c3fd14349433c79fa8abeb573a55fc0fdd769133baac1f5e07abf54aeb"}, {file = "MarkupSafe-2.1.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:8e254ae696c88d98da6555f5ace2279cf7cd5b3f52be2b5cf97feafe883b58d2"}, {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cb0932dc158471523c9637e807d9bfb93e06a95cbf010f1a38b98623b929ef2b"}, {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9402b03f1a1b4dc4c19845e5c749e3ab82d5078d16a2a4c2cd2df62d57bb0707"}, @@ -7728,7 +7717,6 @@ files = [ {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"}, {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d"}, {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515"}, - {file = "PyYAML-6.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:326c013efe8048858a6d312ddd31d56e468118ad4cdeda36c719bf5bb6192290"}, {file = "PyYAML-6.0.1-cp310-cp310-win32.whl", hash = "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924"}, {file = "PyYAML-6.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d"}, {file = "PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007"}, @@ -7736,15 +7724,8 @@ files = [ {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d"}, {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc"}, {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"}, - {file = "PyYAML-6.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e7d73685e87afe9f3b36c799222440d6cf362062f78be1013661b00c5c6f678b"}, {file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"}, {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, - {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, - {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, - {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, - {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, - {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, - {file = "PyYAML-6.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df"}, {file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"}, {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"}, {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"}, @@ -7761,7 +7742,6 @@ files = [ {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5"}, {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696"}, {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735"}, - {file = "PyYAML-6.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:49a183be227561de579b4a36efbb21b3eab9651dd81b1858589f796549873dd6"}, {file = "PyYAML-6.0.1-cp38-cp38-win32.whl", hash = "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206"}, {file = "PyYAML-6.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62"}, {file = "PyYAML-6.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8"}, @@ -7769,7 +7749,6 @@ files = [ {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6"}, {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0"}, {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c"}, - {file = "PyYAML-6.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5"}, {file = "PyYAML-6.0.1-cp39-cp39-win32.whl", hash = "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c"}, {file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"}, {file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"}, @@ -8733,11 +8712,6 @@ files = [ {file = "scikit_learn-1.3.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f66eddfda9d45dd6cadcd706b65669ce1df84b8549875691b1f403730bdef217"}, {file = "scikit_learn-1.3.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c6448c37741145b241eeac617028ba6ec2119e1339b1385c9720dae31367f2be"}, {file = "scikit_learn-1.3.1-cp311-cp311-win_amd64.whl", hash = "sha256:c413c2c850241998168bbb3bd1bb59ff03b1195a53864f0b80ab092071af6028"}, - {file = "scikit_learn-1.3.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:ef540e09873e31569bc8b02c8a9f745ee04d8e1263255a15c9969f6f5caa627f"}, - {file = "scikit_learn-1.3.1-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:9147a3a4df4d401e618713880be023e36109c85d8569b3bf5377e6cd3fecdeac"}, - {file = "scikit_learn-1.3.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d2cd3634695ad192bf71645702b3df498bd1e246fc2d529effdb45a06ab028b4"}, - {file = "scikit_learn-1.3.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0c275a06c5190c5ce00af0acbb61c06374087949f643ef32d355ece12c4db043"}, - {file = "scikit_learn-1.3.1-cp312-cp312-win_amd64.whl", hash = "sha256:0e1aa8f206d0de814b81b41d60c1ce31f7f2c7354597af38fae46d9c47c45122"}, {file = "scikit_learn-1.3.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:52b77cc08bd555969ec5150788ed50276f5ef83abb72e6f469c5b91a0009bbca"}, {file = "scikit_learn-1.3.1-cp38-cp38-macosx_12_0_arm64.whl", hash = "sha256:a683394bc3f80b7c312c27f9b14ebea7766b1f0a34faf1a2e9158d80e860ec26"}, {file = "scikit_learn-1.3.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a15d964d9eb181c79c190d3dbc2fff7338786bf017e9039571418a1d53dab236"}, @@ -11041,7 +11015,7 @@ cli = ["typer"] cohere = ["cohere"] docarray = ["docarray"] embeddings = ["sentence-transformers"] -extended-testing = ["aiosqlite", "amazon-textract-caller", "anthropic", "arxiv", "assemblyai", "atlassian-python-api", "beautifulsoup4", "bibtexparser", "cassio", "chardet", "dashvector", "esprima", "faiss-cpu", "feedparser", "geopandas", "gitpython", "google-cloud-documentai", "gql", "html2text", "jinja2", "jq", "lxml", "markdownify", "motor", "mwparserfromhell", "mwxml", "newspaper3k", "numexpr", "openai", "openai", "openapi-pydantic", "pandas", "pdfminer-six", "pgvector", "psychicapi", "py-trello", "pymupdf", "pypdf", "pypdfium2", "pyspark", "rank-bm25", "rapidfuzz", "rapidocr-onnxruntime", "requests-toolbelt", "rspace_client", "scikit-learn", "sqlite-vss", "streamlit", "sympy", "telethon", "timescale-vector", "tqdm", "upstash-redis", "xata", "xmltodict"] +extended-testing = ["aiosqlite", "amazon-textract-caller", "anthropic", "arxiv", "assemblyai", "atlassian-python-api", "beautifulsoup4", "bibtexparser", "cassio", "chardet", "dashvector", "esprima", "faiss-cpu", "feedparser", "geopandas", "gitpython", "google-cloud-documentai", "gql", "html2text", "jinja2", "jq", "jsonschema", "lxml", "markdownify", "motor", "mwparserfromhell", "mwxml", "newspaper3k", "numexpr", "openai", "openai", "openapi-pydantic", "pandas", "pdfminer-six", "pgvector", "psychicapi", "py-trello", "pymupdf", "pypdf", "pypdfium2", "pyspark", "rank-bm25", "rapidfuzz", "rapidocr-onnxruntime", "requests-toolbelt", "rspace_client", "scikit-learn", "sqlite-vss", "streamlit", "sympy", "telethon", "timescale-vector", "tqdm", "upstash-redis", "xata", "xmltodict"] javascript = ["esprima"] llms = ["clarifai", "cohere", "huggingface_hub", "manifest-ml", "nlpcloud", "openai", "openlm", "torch", "transformers"] openai = ["openai", "tiktoken"] @@ -11051,4 +11025,4 @@ text-helpers = ["chardet"] [metadata] lock-version = "2.0" python-versions = ">=3.8.1,<4.0" -content-hash = "6bf06e81190f228675452f1a7581614898c983d27f2d56ae9ddd92119c114b03" +content-hash = "19dcb9abd0bda24034e36b571e7ac04d432f47281a80fdc5d4a9810add60966b" diff --git a/libs/langchain/pyproject.toml b/libs/langchain/pyproject.toml index 0f7e1a2d2bceb..ac5f4ac5c1ab1 100644 --- a/libs/langchain/pyproject.toml +++ b/libs/langchain/pyproject.toml @@ -114,6 +114,7 @@ cassio = {version = "^0.1.0", optional = true} rdflib = {version = "^6.3.2", optional = true} sympy = {version = "^1.12", optional = true} rapidfuzz = {version = "^3.1.1", optional = true} +jsonschema = {version = ">1", optional = true} langsmith = "~0.0.52" rank-bm25 = {version = "^0.2.2", optional = true} amadeus = {version = ">=8.1.0", optional = true} @@ -350,6 +351,7 @@ extended_testing = [ "openai", "sympy", "rapidfuzz", + "jsonschema", "openai", "rank-bm25", "geopandas", diff --git a/libs/langchain/tests/unit_tests/evaluation/parsing/test_json_schema.py b/libs/langchain/tests/unit_tests/evaluation/parsing/test_json_schema.py new file mode 100644 index 0000000000000..d200c35827079 --- /dev/null +++ b/libs/langchain/tests/unit_tests/evaluation/parsing/test_json_schema.py @@ -0,0 +1,77 @@ +import pytest + +from langchain.evaluation.parsing.json_schema import JsonSchemaEvaluator + + +@pytest.fixture +def json_schema_evaluator() -> JsonSchemaEvaluator: + return JsonSchemaEvaluator() + + +@pytest.mark.requires("jsonschema") +def test_json_schema_evaluator_requires_input( + json_schema_evaluator: JsonSchemaEvaluator, +) -> None: + assert json_schema_evaluator.requires_input is False + + +@pytest.mark.requires("jsonschema") +def test_json_schema_evaluator_requires_reference( + json_schema_evaluator: JsonSchemaEvaluator, +) -> None: + assert json_schema_evaluator.requires_reference is True + + +@pytest.mark.requires("jsonschema") +def test_json_schema_evaluator_evaluation_name( + json_schema_evaluator: JsonSchemaEvaluator, +) -> None: + assert json_schema_evaluator.evaluation_name == "json_schema_validation" + + +@pytest.mark.requires("jsonschema") +def test_json_schema_evaluator_valid_prediction( + json_schema_evaluator: JsonSchemaEvaluator, +) -> None: + prediction = '{"name": "John", "age": 30}' + reference = { + "type": "object", + "properties": {"name": {"type": "string"}, "age": {"type": "integer"}}, + } + result = json_schema_evaluator._evaluate_strings( + prediction=prediction, reference=reference + ) + assert result["score"] is True + + +@pytest.mark.requires("jsonschema") +def test_json_schema_evaluator_invalid_prediction( + json_schema_evaluator: JsonSchemaEvaluator, +) -> None: + prediction = '{"name": "John", "age": "30"}' # age is a string instead of integer + reference = { + "type": "object", + "properties": {"name": {"type": "string"}, "age": {"type": "integer"}}, + } + result = json_schema_evaluator._evaluate_strings( + prediction=prediction, reference=reference + ) + assert result["score"] is False + assert "reasoning" in result + + +@pytest.mark.requires("jsonschema") +def test_json_schema_evaluator_missing_property( + json_schema_evaluator: JsonSchemaEvaluator, +) -> None: + prediction = '{"name": "John"}' # age property is missing + reference = { + "type": "object", + "properties": {"name": {"type": "string"}, "age": {"type": "integer"}}, + "required": ["name", "age"], + } + result = json_schema_evaluator._evaluate_strings( + prediction=prediction, reference=reference + ) + assert result["score"] is False + assert "reasoning" in result