Skip to content

Commit

Permalink
Support vector search on Cosmos DB
Browse files Browse the repository at this point in the history
 Fixes #33783

 This PR introduces:

 - `IsVector()` to configure a property to be configured as a vector (embedding) in the document.
   - The distance function and dimensions are specified.
   - The data type can be specified, or otherwise is inferred.
 - `HasIndex().ForVectors()` to configure a vector index over a vector property.
 - `VectorDistance()` which translates to the Cosmos `VectorDistance` function
   - The distance function and data type are taken from the property mapping, or can be overridden.

 Known issues:

 - Float16 (Half) is not working in Cosmos--needs investigation
 - Exception on int array case--could be EF or Cosmos--needs investigation
 - Owned types mess up the materialization--this will be fixed by the ReadItem improvements I am working on
  • Loading branch information
ajcvickers committed Jun 14, 2024
1 parent 5eec6fa commit db246d8
Show file tree
Hide file tree
Showing 44 changed files with 1,679 additions and 250 deletions.
2 changes: 1 addition & 1 deletion src/EFCore.Cosmos/EFCore.Cosmos.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@
</ItemGroup>

<ItemGroup>
<PackageReference Include="Microsoft.Azure.Cosmos" Version="3.40.0" />
<PackageReference Include="Microsoft.Azure.Cosmos" Version="3.41.0-preview.0" />
</ItemGroup>

<ItemGroup>
Expand Down
46 changes: 46 additions & 0 deletions src/EFCore.Cosmos/Extensions/CosmosDbFunctionsExtensions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -47,4 +47,50 @@ public static T CoalesceUndefined<T>(
T expression1,
T expression2)
=> throw new InvalidOperationException(CoreStrings.FunctionOnClient(nameof(CoalesceUndefined)));

/// <summary>
/// Returns the distance between two vectors, using the distance function and data type defined using
/// <see cref="CosmosPropertyBuilderExtensions.IsVector(Microsoft.EntityFrameworkCore.Metadata.Builders.PropertyBuilder,Microsoft.Azure.Cosmos.DistanceFunction,ulong,System.Nullable{Microsoft.Azure.Cosmos.VectorDataType})"/>.
/// </summary>
/// <param name="_">The <see cref="DbFunctions" /> instance.</param>
/// <param name="vector1">The first vector.</param>
/// <param name="vector2">The second vector.</param>
public static double VectorDistance<T>(this DbFunctions _, IEnumerable<T> vector1, IEnumerable<T> vector2)
=> throw new InvalidOperationException(CoreStrings.FunctionOnClient(nameof(VectorDistance)));

/// <summary>
/// Returns the distance between two vectors, given a distance function (aka similarity measure).
/// </summary>
/// <param name="_">The <see cref="DbFunctions" /> instance.</param>
/// <param name="vector1">The first vector.</param>
/// <param name="vector2">The second vector.</param>
/// <param name="useBruteForce">A <see langword="bool"/> specifying how the computed value is used in an ORDER BY
/// expression. If <see langword="true"/>, then brute force is used, otherwise any index defined on the vector
/// property is leveraged.</param>
public static double VectorDistance<T>(
this DbFunctions _,
IEnumerable<T> vector1,
IEnumerable<T> vector2,
[NotParameterized] bool useBruteForce)
=> throw new InvalidOperationException(CoreStrings.FunctionOnClient(nameof(VectorDistance)));

/// <summary>
/// Returns the distance between two vectors, given a distance function (aka similarity measure).
/// </summary>
/// <param name="_">The <see cref="DbFunctions" /> instance.</param>
/// <param name="vector1">The first vector.</param>
/// <param name="vector2">The second vector.</param>
/// <param name="distanceFunction">The distance function to use.</param>
/// <param name="dataType">The vector data type to use.</param>
/// <param name="useBruteForce">A <see langword="bool"/> specifying how the computed value is used in an ORDER BY
/// expression. If <see langword="true"/>, then brute force is used, otherwise any index defined on the vector
/// property is leveraged.</param>
public static double VectorDistance<T>(
this DbFunctions _,
IEnumerable<T> vector1,
IEnumerable<T> vector2,
[NotParameterized] bool useBruteForce,
[NotParameterized] DistanceFunction distanceFunction,
[NotParameterized] VectorDataType dataType)
=> throw new InvalidOperationException(CoreStrings.FunctionOnClient(nameof(VectorDistance)));
}
94 changes: 94 additions & 0 deletions src/EFCore.Cosmos/Extensions/CosmosIndexBuilderExtensions.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using Microsoft.EntityFrameworkCore.Cosmos.Metadata.Internal;

// ReSharper disable once CheckNamespace
namespace Microsoft.EntityFrameworkCore;

/// <summary>
/// Azure Cosmos DB-specific extension methods for <see cref="IndexBuilder"/>.
/// </summary>
/// <remarks>
/// See <see href="https://aka.ms/efcore-docs-modeling">Modeling entity types and relationships</see>, and
/// <see href="https://aka.ms/efcore-docs-cosmos">Accessing Azure Cosmos DB with EF Core</see> for more information and examples.
/// </remarks>
public static class CosmosIndexBuilderExtensions
{
/// <summary>
/// Configures whether the index as a vector index with the given <see cref="VectorIndexType"/>.
/// </summary>
/// <remarks>
/// See <see href="https://aka.ms/efcore-docs-modeling">Modeling entity types and relationships</see>, and
/// <see href="https://aka.ms/efcore-docs-cosmos">Accessing Azure Cosmos DB with EF Core</see> for more information and examples.
/// </remarks>
/// <param name="indexBuilder">The builder for the index being configured.</param>
/// <param name="indexType">The type of vector index to create.</param>
/// <returns>A builder to further configure the index.</returns>
public static IndexBuilder ForVectors(this IndexBuilder indexBuilder, VectorIndexType? indexType)
{
indexBuilder.Metadata.SetVectorIndexType(indexType);

return indexBuilder;
}

/// <summary>
/// Configures whether the index as a vector index with the given <see cref="VectorIndexType"/>.
/// </summary>
/// <remarks>
/// See <see href="https://aka.ms/efcore-docs-modeling">Modeling entity types and relationships</see>, and
/// <see href="https://aka.ms/efcore-docs-cosmos">Accessing Azure Cosmos DB with EF Core</see> for more information and examples.
/// </remarks>
/// <param name="indexBuilder">The builder for the index being configured.</param>
/// <param name="indexType">The type of vector index to create.</param>
/// <returns>A builder to further configure the index.</returns>
public static IndexBuilder<TEntity> ForVectors<TEntity>(
this IndexBuilder<TEntity> indexBuilder,
VectorIndexType? indexType)
=> (IndexBuilder<TEntity>)ForVectors((IndexBuilder)indexBuilder, indexType);

/// <summary>
/// Configures whether the index as a vector index with the given <see cref="VectorIndexType"/>.
/// </summary>
/// <remarks>
/// See <see href="https://aka.ms/efcore-docs-modeling">Modeling entity types and relationships</see>, and
/// <see href="https://aka.ms/efcore-docs-cosmos">Accessing Azure Cosmos DB with EF Core</see> for more information and examples.
/// </remarks>
/// <param name="indexBuilder">The builder for the index being configured.</param>
/// <param name="indexType">The type of vector index to create.</param>
/// <param name="fromDataAnnotation">Indicates whether the configuration was specified using a data annotation.</param>
/// <returns>
/// The same builder instance if the configuration was applied,
/// <see langword="null" /> otherwise.
/// </returns>
public static IConventionIndexBuilder? ForVectors(
this IConventionIndexBuilder indexBuilder,
VectorIndexType? indexType,
bool fromDataAnnotation = false)
{
if (indexBuilder.CanSetVectorIndexType(indexType, fromDataAnnotation))
{
indexBuilder.Metadata.SetVectorIndexType(indexType, fromDataAnnotation);
return indexBuilder;
}

return null;
}

/// <summary>
/// Returns a value indicating whether the index can be configured for vectors.
/// </summary>
/// <remarks>
/// See <see href="https://aka.ms/efcore-docs-modeling">Modeling entity types and relationships</see>, and
/// <see href="https://aka.ms/efcore-docs-cosmos">Accessing Azure Cosmos DB with EF Core</see> for more information and examples.
/// </remarks>
/// <param name="indexBuilder">The builder for the index being configured.</param>
/// <param name="indexType">The index type to use.</param>
/// <param name="fromDataAnnotation">Indicates whether the configuration was specified using a data annotation.</param>
/// <returns><see langword="true" /> if the index can be configured for vectors.</returns>
public static bool CanSetVectorIndexType(
this IConventionIndexBuilder indexBuilder,
VectorIndexType? indexType,
bool fromDataAnnotation = false)
=> indexBuilder.CanSetAnnotation(CosmosAnnotationNames.VectorIndexType, indexType, fromDataAnnotation);
}
59 changes: 59 additions & 0 deletions src/EFCore.Cosmos/Extensions/CosmosIndexExtensions.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using Microsoft.EntityFrameworkCore.Cosmos.Metadata.Internal;

// ReSharper disable once CheckNamespace
namespace Microsoft.EntityFrameworkCore;

/// <summary>
/// Index extension methods for Azure Cosmos DB-specific metadata.
/// </summary>
/// <remarks>
/// See <see href="https://aka.ms/efcore-docs-modeling">Modeling entity types and relationships</see>, and
/// <see href="https://aka.ms/efcore-docs-cosmos">Accessing Azure Cosmos DB with EF Core</see> for more information and examples.
/// </remarks>
public static class CosmosIndexExtensions
{
/// <summary>
/// Returns the <see cref="VectorIndexType"/> to use for this index.
/// </summary>
/// <param name="index">The index.</param>
/// <returns>The index type to use, or <see langword="null" /> if none is set.</returns>
public static VectorIndexType? GetVectorIndexType(this IReadOnlyIndex index)
=> (index is RuntimeIndex)
? throw new InvalidOperationException(CoreStrings.RuntimeModelMissingData)
: (VectorIndexType?)index[CosmosAnnotationNames.VectorIndexType];

/// <summary>
/// Sets the <see cref="VectorIndexType"/> to use for this index.
/// </summary>
/// <param name="indexType">The index type to use.</param>
/// <param name="index">The index.</param>
public static void SetVectorIndexType(this IMutableIndex index, VectorIndexType? indexType)
=> index.SetAnnotation(CosmosAnnotationNames.VectorIndexType, indexType);

/// <summary>
/// Sets the <see cref="VectorIndexType"/> to use for this index.
/// </summary>
/// <param name="indexType">The index type to use.</param>
/// <param name="index">The index.</param>
/// <param name="fromDataAnnotation">Indicates whether the configuration was specified using a data annotation.</param>
/// <returns>The configured value.</returns>
public static VectorIndexType? SetVectorIndexType(
this IConventionIndex index,
VectorIndexType? indexType,
bool fromDataAnnotation = false)
=> (VectorIndexType?)index.SetAnnotation(
CosmosAnnotationNames.VectorIndexType,
indexType,
fromDataAnnotation)?.Value;

/// <summary>
/// Returns the <see cref="ConfigurationSource" /> for whether the <see cref="GetVectorIndexType"/>.
/// </summary>
/// <param name="property">The property.</param>
/// <returns>The <see cref="ConfigurationSource" /> for whether the index is clustered.</returns>
public static ConfigurationSource? GetVectorIndexTypeConfigurationSource(this IConventionIndex property)
=> property.FindAnnotation(CosmosAnnotationNames.VectorIndexType)?.GetConfigurationSource();
}
120 changes: 120 additions & 0 deletions src/EFCore.Cosmos/Extensions/CosmosPropertyBuilderExtensions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,109 @@ public static bool CanSetJsonProperty(
bool fromDataAnnotation = false)
=> propertyBuilder.CanSetAnnotation(CosmosAnnotationNames.PropertyName, name, fromDataAnnotation);

/// <summary>
/// Configures the property as a vector for Azure Cosmos DB.
/// </summary>
/// <remarks>
/// See <see href="https://aka.ms/efcore-docs-modeling">Modeling entity types and relationships</see>, and
/// <see href="https://aka.ms/efcore-docs-cosmos">Accessing Azure Cosmos DB with EF Core</see> for more information and examples.
/// </remarks>
/// <param name="propertyBuilder">The builder for the property being configured.</param>
/// <param name="distanceFunction">The distance function for a vector comparisons.</param>
/// <param name="dimensions">The number of dimensions in the vector.</param>
/// <param name="dataType">The vector data type, or <see langword="null"/> to choose the data type automatically.</param>
/// <returns>The same builder instance so that multiple calls can be chained.</returns>
public static PropertyBuilder IsVector(
this PropertyBuilder propertyBuilder,
DistanceFunction distanceFunction,
ulong dimensions,
VectorDataType? dataType = null)
{
propertyBuilder.Metadata.SetVectorType(CreateVectorType(distanceFunction, dimensions, dataType));
return propertyBuilder;
}

/// <summary>
/// Configures the property as a vector for Azure Cosmos DB.
/// </summary>
/// <remarks>
/// See <see href="https://aka.ms/efcore-docs-modeling">Modeling entity types and relationships</see>, and
/// <see href="https://aka.ms/efcore-docs-cosmos">Accessing Azure Cosmos DB with EF Core</see> for more information and examples.
/// </remarks>
/// <remarks>
/// See <see href="https://aka.ms/efcore-docs-modeling">Modeling entity types and relationships</see>, and
/// <see href="https://aka.ms/efcore-docs-cosmos">Accessing Azure Cosmos DB with EF Core</see> for more information and examples.
/// </remarks>
/// <typeparam name="TProperty">The type of the property being configured.</typeparam>
/// <param name="propertyBuilder">The builder for the property being configured.</param>
/// <param name="distanceFunction">The distance function for a vector comparisons.</param>
/// <param name="dimensions">The number of dimensions in the vector.</param>
/// <param name="dataType">The vector data type, or <see langword="null"/> to choose the data type automatically.</param>
/// <returns>The same builder instance so that multiple calls can be chained.</returns>
public static PropertyBuilder<TProperty> IsVector<TProperty>(
this PropertyBuilder<TProperty> propertyBuilder,
DistanceFunction distanceFunction,
ulong dimensions,
VectorDataType? dataType = null)
=> (PropertyBuilder<TProperty>)IsVector((PropertyBuilder)propertyBuilder, distanceFunction, dimensions, dataType);

/// <summary>
/// Configures the property as a vector for Azure Cosmos DB.
/// </summary>
/// <remarks>
/// See <see href="https://aka.ms/efcore-docs-modeling">Modeling entity types and relationships</see>, and
/// <see href="https://aka.ms/efcore-docs-cosmos">Accessing Azure Cosmos DB with EF Core</see> for more information and examples.
/// </remarks>
/// <param name="propertyBuilder">The builder for the property being configured.</param>
/// <param name="distanceFunction">The distance function for a vector comparisons.</param>
/// <param name="dimensions">The number of dimensions in the vector.</param>
/// <param name="dataType">The vector data type, or <see langword="null"/> to choose the data type automatically.</param>
/// <param name="fromDataAnnotation">Indicates whether the configuration was specified using a data annotation.</param>
/// <returns>
/// The same builder instance if the configuration was applied,
/// <see langword="null" /> otherwise.
/// </returns>
public static IConventionPropertyBuilder? IsVector(
this IConventionPropertyBuilder propertyBuilder,
DistanceFunction distanceFunction,
ulong dimensions,
VectorDataType? dataType = null,
bool fromDataAnnotation = false)
{
if (!propertyBuilder.CanSetIsVector(distanceFunction, dimensions, dataType, fromDataAnnotation))
{
return null;
}

propertyBuilder.Metadata.SetVectorType(CreateVectorType(distanceFunction, dimensions, dataType), fromDataAnnotation);

return propertyBuilder;
}

/// <summary>
/// Returns a value indicating whether the vector type can be set.
/// </summary>
/// <remarks>
/// See <see href="https://aka.ms/efcore-docs-modeling">Modeling entity types and relationships</see>, and
/// <see href="https://aka.ms/efcore-docs-cosmos">Accessing Azure Cosmos DB with EF Core</see> for more information and examples.
/// </remarks>
/// <param name="propertyBuilder">The builder for the property being configured.</param>
/// <param name="distanceFunction">The distance function for a vector comparisons.</param>
/// <param name="dimensions">The number of dimensions in the vector.</param>
/// <param name="dataType">The vector data type, or <see langword="null"/> to choose the data type automatically.</param>
/// <param name="fromDataAnnotation">Indicates whether the configuration was specified using a data annotation.</param>
/// <returns><see langword="true" /> if the vector type can be set.</returns>
public static bool CanSetIsVector(
this IConventionPropertyBuilder propertyBuilder,
DistanceFunction distanceFunction,
ulong dimensions,
VectorDataType? dataType = null,
bool fromDataAnnotation = false)
=> propertyBuilder.CanSetAnnotation(
CosmosAnnotationNames.VectorType,
CreateVectorType(distanceFunction, dimensions, dataType),
fromDataAnnotation);

/// <summary>
/// Configures this property to be the etag concurrency token.
/// </summary>
Expand Down Expand Up @@ -136,4 +239,21 @@ public static PropertyBuilder IsETagConcurrency(this PropertyBuilder propertyBui
public static PropertyBuilder<TProperty> IsETagConcurrency<TProperty>(
this PropertyBuilder<TProperty> propertyBuilder)
=> (PropertyBuilder<TProperty>)IsETagConcurrency((PropertyBuilder)propertyBuilder);

private static CosmosVectorType CreateVectorType(DistanceFunction distanceFunction, ulong dimensions, VectorDataType? dataType)
{
if (!Enum.IsDefined(distanceFunction))
{
throw new ArgumentException(CoreStrings.InvalidEnumValue(distanceFunction, nameof(distanceFunction), typeof(DistanceFunction)));
}

if (dataType.HasValue
&& !Enum.IsDefined(dataType.Value))
{
throw new ArgumentException(CoreStrings.InvalidEnumValue(dataType.Value, nameof(dataType), typeof(VectorDataType)));
}

var vectorType = new CosmosVectorType(distanceFunction, dimensions, dataType);
return vectorType;
}
}
Loading

0 comments on commit db246d8

Please sign in to comment.