Skip to content

Commit 8a8b0b5

Browse files
adamsitnikwestey-m
andauthored
.Net MEVD: Port the Pinecone connector to use Pinecone.Client (#10952)
It's a resurrected #10788 built on top of #10944 which I hope will get merged first. fixes #10415 --------- Co-authored-by: westey <[email protected]>
1 parent f001f61 commit 8a8b0b5

File tree

40 files changed

+1192
-1557
lines changed

40 files changed

+1192
-1557
lines changed

dotnet/Directory.Packages.props

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@
6565
<PackageVersion Include="OpenTelemetry.Instrumentation.Http" Version="1.9.0" />
6666
<PackageVersion Include="OpenTelemetry.Instrumentation.Runtime" Version="1.9.0" />
6767
<PackageVersion Include="PdfPig" Version="0.1.10" />
68-
<PackageVersion Include="Pinecone.NET" Version="2.1.1" />
68+
<PackageVersion Include="Pinecone.Client" Version="3.0.0" />
6969
<PackageVersion Include="Prompty.Core" Version="0.0.23-alpha" />
7070
<PackageVersion Include="PuppeteerSharp" Version="20.0.5" />
7171
<PackageVersion Include="System.Diagnostics.DiagnosticSource" Version="8.0.1" />

dotnet/SK-dotnet.sln

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -487,6 +487,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ModelContextProtocol", "sam
487487
EndProject
488488
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "SqlServerIntegrationTests", "src\VectorDataIntegrationTests\SqlServerIntegrationTests\SqlServerIntegrationTests.csproj", "{A5E6193C-8431-4C6E-B674-682CB41EAA0C}"
489489
EndProject
490+
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "PineconeIntegrationTests", "src\VectorDataIntegrationTests\PineconeIntegrationTests\PineconeIntegrationTests.csproj", "{E9A74E0C-BC02-4DDD-A487-89847EDF8026}"
491+
EndProject
490492
Global
491493
GlobalSection(SolutionConfigurationPlatforms) = preSolution
492494
Debug|Any CPU = Debug|Any CPU
@@ -1334,6 +1336,12 @@ Global
13341336
{A5E6193C-8431-4C6E-B674-682CB41EAA0C}.Publish|Any CPU.Build.0 = Debug|Any CPU
13351337
{A5E6193C-8431-4C6E-B674-682CB41EAA0C}.Release|Any CPU.ActiveCfg = Release|Any CPU
13361338
{A5E6193C-8431-4C6E-B674-682CB41EAA0C}.Release|Any CPU.Build.0 = Release|Any CPU
1339+
{E9A74E0C-BC02-4DDD-A487-89847EDF8026}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
1340+
{E9A74E0C-BC02-4DDD-A487-89847EDF8026}.Debug|Any CPU.Build.0 = Debug|Any CPU
1341+
{E9A74E0C-BC02-4DDD-A487-89847EDF8026}.Publish|Any CPU.ActiveCfg = Release|Any CPU
1342+
{E9A74E0C-BC02-4DDD-A487-89847EDF8026}.Publish|Any CPU.Build.0 = Release|Any CPU
1343+
{E9A74E0C-BC02-4DDD-A487-89847EDF8026}.Release|Any CPU.ActiveCfg = Release|Any CPU
1344+
{E9A74E0C-BC02-4DDD-A487-89847EDF8026}.Release|Any CPU.Build.0 = Release|Any CPU
13371345
EndGlobalSection
13381346
GlobalSection(SolutionProperties) = preSolution
13391347
HideSolutionNode = FALSE
@@ -1516,6 +1524,7 @@ Global
15161524
{8C658E1E-83C8-4127-B8BF-27A638A45DDD} = {6823CD5E-2ABE-41EB-B865-F86EC13F0CF9}
15171525
{B16AC373-3DA8-4505-9510-110347CD635D} = {5D4C0700-BBB5-418F-A7B2-F392B9A18263}
15181526
{A5E6193C-8431-4C6E-B674-682CB41EAA0C} = {4F381919-F1BE-47D8-8558-3187ED04A84F}
1527+
{E9A74E0C-BC02-4DDD-A487-89847EDF8026} = {4F381919-F1BE-47D8-8558-3187ED04A84F}
15191528
EndGlobalSection
15201529
GlobalSection(ExtensibilityGlobals) = postSolution
15211530
SolutionGuid = {FBDC56A3-86AD-4323-AA0F-201E59123B83}

dotnet/src/Connectors/Connectors.Memory.Pinecone/Connectors.Memory.Pinecone.csproj

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
</PropertyGroup>
2020

2121
<ItemGroup>
22-
<PackageReference Include="Pinecone.NET" />
22+
<PackageReference Include="Pinecone.Client" />
2323
<PackageReference Include="System.Text.Json" />
2424
</ItemGroup>
2525

Lines changed: 267 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,267 @@
1+
// Copyright (c) Microsoft. All rights reserved.
2+
3+
using System;
4+
using System.Collections;
5+
using System.Collections.Generic;
6+
using System.Diagnostics;
7+
using System.Diagnostics.CodeAnalysis;
8+
using System.Linq;
9+
using System.Linq.Expressions;
10+
using System.Reflection;
11+
using System.Runtime.CompilerServices;
12+
using Pinecone;
13+
14+
namespace Microsoft.SemanticKernel.Connectors.Pinecone;
15+
16+
// This class is a modification of MongoDBFilterTranslator that uses the same query language
17+
// (https://docs.pinecone.io/guides/data/understanding-metadata#metadata-query-language),
18+
// with the difference of representing everything as Metadata rather than BsonDocument.
19+
// For representing collections of any kinds, we use List<MetadataValue>,
20+
// as we sometimes need to extend the collection (with for example another condition).
21+
internal class PineconeFilterTranslator
22+
{
23+
private IReadOnlyDictionary<string, string> _storagePropertyNames = null!;
24+
private ParameterExpression _recordParameter = null!;
25+
26+
internal Metadata Translate(LambdaExpression lambdaExpression, IReadOnlyDictionary<string, string> storagePropertyNames)
27+
{
28+
this._storagePropertyNames = storagePropertyNames;
29+
30+
Debug.Assert(lambdaExpression.Parameters.Count == 1);
31+
this._recordParameter = lambdaExpression.Parameters[0];
32+
33+
return this.Translate(lambdaExpression.Body);
34+
}
35+
36+
private Metadata Translate(Expression? node)
37+
=> node switch
38+
{
39+
BinaryExpression
40+
{
41+
NodeType: ExpressionType.Equal or ExpressionType.NotEqual
42+
or ExpressionType.GreaterThan or ExpressionType.GreaterThanOrEqual
43+
or ExpressionType.LessThan or ExpressionType.LessThanOrEqual
44+
} binary
45+
=> this.TranslateEqualityComparison(binary),
46+
47+
BinaryExpression { NodeType: ExpressionType.AndAlso or ExpressionType.OrElse } andOr
48+
=> this.TranslateAndOr(andOr),
49+
UnaryExpression { NodeType: ExpressionType.Not } not
50+
=> this.TranslateNot(not),
51+
52+
// MemberExpression is generally handled within e.g. TranslateEqualityComparison; this is used to translate direct bool inside filter (e.g. Filter => r => r.Bool)
53+
MemberExpression member when member.Type == typeof(bool) && this.TryTranslateFieldAccess(member, out _)
54+
=> this.TranslateEqualityComparison(Expression.Equal(member, Expression.Constant(true))),
55+
56+
MethodCallExpression methodCall => this.TranslateMethodCall(methodCall),
57+
58+
_ => throw new NotSupportedException("The following NodeType is unsupported: " + node?.NodeType)
59+
};
60+
61+
private Metadata TranslateEqualityComparison(BinaryExpression binary)
62+
{
63+
if ((this.TryTranslateFieldAccess(binary.Left, out var storagePropertyName) && TryGetConstant(binary.Right, out var value))
64+
|| (this.TryTranslateFieldAccess(binary.Right, out storagePropertyName) && TryGetConstant(binary.Left, out value)))
65+
{
66+
if (value is null)
67+
{
68+
throw new NotSupportedException("Pincone does not support null checks in vector search pre-filters");
69+
}
70+
71+
// Short form of equality (instead of $eq)
72+
if (binary.NodeType is ExpressionType.Equal)
73+
{
74+
return new Metadata { [storagePropertyName] = ToMetadata(value) };
75+
}
76+
77+
var filterOperator = binary.NodeType switch
78+
{
79+
ExpressionType.NotEqual => "$ne",
80+
ExpressionType.GreaterThan => "$gt",
81+
ExpressionType.GreaterThanOrEqual => "$gte",
82+
ExpressionType.LessThan => "$lt",
83+
ExpressionType.LessThanOrEqual => "$lte",
84+
85+
_ => throw new UnreachableException()
86+
};
87+
88+
return new Metadata { [storagePropertyName] = new Metadata { [filterOperator] = ToMetadata(value) } };
89+
}
90+
91+
throw new NotSupportedException("Invalid equality/comparison");
92+
}
93+
94+
private Metadata TranslateAndOr(BinaryExpression andOr)
95+
{
96+
var mongoOperator = andOr.NodeType switch
97+
{
98+
ExpressionType.AndAlso => "$and",
99+
ExpressionType.OrElse => "$or",
100+
_ => throw new UnreachableException()
101+
};
102+
103+
var (left, right) = (this.Translate(andOr.Left), this.Translate(andOr.Right));
104+
105+
List<MetadataValue?>? nestedLeft = GetListOrNull(left, mongoOperator);
106+
List<MetadataValue?>? nestedRight = GetListOrNull(right, mongoOperator);
107+
108+
switch ((nestedLeft, nestedRight))
109+
{
110+
case (not null, not null):
111+
nestedLeft.AddRange(nestedRight);
112+
return left;
113+
case (not null, null):
114+
nestedLeft.Add(right);
115+
return left;
116+
case (null, not null):
117+
nestedRight.Insert(0, left);
118+
return right;
119+
case (null, null):
120+
return new Metadata { [mongoOperator] = new MetadataValue(new List<MetadataValue?> { left, right }) };
121+
}
122+
}
123+
124+
private Metadata TranslateNot(UnaryExpression not)
125+
{
126+
switch (not.Operand)
127+
{
128+
// Special handling for !(a == b) and !(a != b)
129+
case BinaryExpression { NodeType: ExpressionType.Equal or ExpressionType.NotEqual } binary:
130+
return this.TranslateEqualityComparison(
131+
Expression.MakeBinary(
132+
binary.NodeType is ExpressionType.Equal ? ExpressionType.NotEqual : ExpressionType.Equal,
133+
binary.Left,
134+
binary.Right));
135+
136+
// Not over bool field (Filter => r => !r.Bool)
137+
case MemberExpression member when member.Type == typeof(bool) && this.TryTranslateFieldAccess(member, out _):
138+
return this.TranslateEqualityComparison(Expression.Equal(member, Expression.Constant(false)));
139+
}
140+
141+
var operand = this.Translate(not.Operand);
142+
143+
// Identify NOT over $in, transform to $nin (https://www.mongodb.com/docs/manual/reference/operator/query/nin/#mongodb-query-op.-nin)
144+
if (operand.Count == 1 && operand.First() is { Key: var fieldName, Value: MetadataValue nested } && nested.Value is Metadata nestedMetadata
145+
&& GetListOrNull(nestedMetadata, "$in") is List<MetadataValue> values)
146+
{
147+
return new Metadata { [fieldName] = new Metadata { ["$nin"] = values } };
148+
}
149+
150+
throw new NotSupportedException("Pinecone does not support the NOT operator in vector search pre-filters");
151+
}
152+
153+
private Metadata TranslateMethodCall(MethodCallExpression methodCall)
154+
=> methodCall switch
155+
{
156+
// Enumerable.Contains()
157+
{ Method.Name: nameof(Enumerable.Contains), Arguments: [var source, var item] } contains
158+
when contains.Method.DeclaringType == typeof(Enumerable)
159+
=> this.TranslateContains(source, item),
160+
161+
// List.Contains()
162+
{
163+
Method:
164+
{
165+
Name: nameof(Enumerable.Contains),
166+
DeclaringType: { IsGenericType: true } declaringType
167+
},
168+
Object: Expression source,
169+
Arguments: [var item]
170+
} when declaringType.GetGenericTypeDefinition() == typeof(List<>) => this.TranslateContains(source, item),
171+
172+
_ => throw new NotSupportedException($"Unsupported method call: {methodCall.Method.DeclaringType?.Name}.{methodCall.Method.Name}")
173+
};
174+
175+
private Metadata TranslateContains(Expression source, Expression item)
176+
{
177+
switch (source)
178+
{
179+
// Contains over array column (r => r.Strings.Contains("foo"))
180+
case var _ when this.TryTranslateFieldAccess(source, out _):
181+
throw new NotSupportedException("Pinecone does not support Contains within array fields ($elemMatch) in vector search pre-filters");
182+
183+
// Contains over inline enumerable
184+
case NewArrayExpression newArray:
185+
var elements = new object?[newArray.Expressions.Count];
186+
187+
for (var i = 0; i < newArray.Expressions.Count; i++)
188+
{
189+
if (!TryGetConstant(newArray.Expressions[i], out var elementValue))
190+
{
191+
throw new NotSupportedException("Invalid element in array");
192+
}
193+
194+
elements[i] = elementValue;
195+
}
196+
197+
return ProcessInlineEnumerable(elements, item);
198+
199+
// Contains over captured enumerable (we inline)
200+
case var _ when TryGetConstant(source, out var constantEnumerable)
201+
&& constantEnumerable is IEnumerable enumerable and not string:
202+
return ProcessInlineEnumerable(enumerable, item);
203+
204+
default:
205+
throw new NotSupportedException("Unsupported Contains expression");
206+
}
207+
208+
Metadata ProcessInlineEnumerable(IEnumerable elements, Expression item)
209+
{
210+
if (!this.TryTranslateFieldAccess(item, out var storagePropertyName))
211+
{
212+
throw new NotSupportedException("Unsupported item type in Contains");
213+
}
214+
215+
return new Metadata
216+
{
217+
[storagePropertyName] = new Metadata
218+
{
219+
["$in"] = new MetadataValue(elements.Cast<object>().Select(ToMetadata).ToList())
220+
}
221+
};
222+
}
223+
}
224+
225+
private bool TryTranslateFieldAccess(Expression expression, [NotNullWhen(true)] out string? storagePropertyName)
226+
{
227+
if (expression is MemberExpression memberExpression && memberExpression.Expression == this._recordParameter)
228+
{
229+
if (!this._storagePropertyNames.TryGetValue(memberExpression.Member.Name, out storagePropertyName))
230+
{
231+
throw new InvalidOperationException($"Property name '{memberExpression.Member.Name}' provided as part of the filter clause is not a valid property name.");
232+
}
233+
234+
return true;
235+
}
236+
237+
storagePropertyName = null;
238+
return false;
239+
}
240+
241+
private static bool TryGetConstant(Expression expression, out object? constantValue)
242+
{
243+
switch (expression)
244+
{
245+
case ConstantExpression { Value: var v }:
246+
constantValue = v;
247+
return true;
248+
249+
// This identifies compiler-generated closure types which contain captured variables.
250+
case MemberExpression { Expression: ConstantExpression constant, Member: FieldInfo fieldInfo }
251+
when constant.Type.Attributes.HasFlag(TypeAttributes.NestedPrivate)
252+
&& Attribute.IsDefined(constant.Type, typeof(CompilerGeneratedAttribute), inherit: true):
253+
constantValue = fieldInfo.GetValue(constant.Value);
254+
return true;
255+
256+
default:
257+
constantValue = null;
258+
return false;
259+
}
260+
}
261+
262+
private static MetadataValue? ToMetadata(object? value)
263+
=> value is null ? null : PineconeVectorStoreRecordFieldMapping.ConvertToMetadataValue(value);
264+
265+
private static List<MetadataValue?>? GetListOrNull(Metadata value, string mongoOperator)
266+
=> value.Count == 1 && value.First() is var element && element.Key == mongoOperator ? element.Value?.Value as List<MetadataValue?> : null;
267+
}

dotnet/src/Connectors/Connectors.Memory.Pinecone/PineconeGenericDataModelMapper.cs

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -34,17 +34,17 @@ public PineconeGenericDataModelMapper(
3434
/// <inheritdoc />
3535
public Vector MapFromDataToStorageModel(VectorStoreGenericDataModel<string> dataModel)
3636
{
37-
var metadata = new MetadataMap();
37+
var metadata = new Metadata();
3838

3939
// Map data properties.
4040
foreach (var dataProperty in this._propertyReader.DataProperties)
4141
{
4242
if (dataModel.Data.TryGetValue(dataProperty.DataModelPropertyName, out var propertyValue))
4343
{
4444
var propertyStorageName = this._propertyReader.GetStoragePropertyName(dataProperty.DataModelPropertyName);
45-
metadata[propertyStorageName] = propertyValue == null ?
46-
new MetadataValue() :
47-
PineconeVectorStoreRecordFieldMapping.ConvertToMetadataValue(propertyValue);
45+
metadata[propertyStorageName] = propertyValue is not null
46+
? PineconeVectorStoreRecordFieldMapping.ConvertToMetadataValue(propertyValue)
47+
: null;
4848
}
4949
}
5050

@@ -62,8 +62,8 @@ public Vector MapFromDataToStorageModel(VectorStoreGenericDataModel<string> data
6262
// TODO: what about sparse values?
6363
var result = new Vector
6464
{
65-
Id = (string)dataModel.Key,
66-
Values = values.ToArray(),
65+
Id = dataModel.Key,
66+
Values = values,
6767
Metadata = metadata,
6868
SparseValues = null
6969
};
@@ -80,7 +80,7 @@ public VectorStoreGenericDataModel<string> MapFromStorageToDataModel(Vector stor
8080
// Set Vector.
8181
if (options?.IncludeVectors is true)
8282
{
83-
dataModel.Vectors.Add(this._propertyReader.FirstVectorPropertyName!, new ReadOnlyMemory<float>(storageModel.Values));
83+
dataModel.Vectors.Add(this._propertyReader.FirstVectorPropertyName!, storageModel.Values);
8484
}
8585

8686
// Set Data.
@@ -91,9 +91,10 @@ public VectorStoreGenericDataModel<string> MapFromStorageToDataModel(Vector stor
9191
var propertyStorageName = this._propertyReader.GetStoragePropertyName(dataProperty.DataModelPropertyName);
9292
if (storageModel.Metadata.TryGetValue(propertyStorageName, out var propertyValue))
9393
{
94-
dataModel.Data[dataProperty.DataModelPropertyName] = PineconeVectorStoreRecordFieldMapping.ConvertFromMetadataValueToNativeType(
95-
propertyValue,
96-
dataProperty.PropertyType);
94+
dataModel.Data[dataProperty.DataModelPropertyName] =
95+
propertyValue is not null
96+
? PineconeVectorStoreRecordFieldMapping.ConvertFromMetadataValueToNativeType(propertyValue, dataProperty.PropertyType)
97+
: null;
9798
}
9899
}
99100
}

0 commit comments

Comments
 (0)