Skip to content

Commit

Permalink
Add OwnerSetComparer which detects ownership changes (#499)
Browse files Browse the repository at this point in the history
Progress on #6475
  • Loading branch information
joelverhagen committed Apr 10, 2019
1 parent 9ef501f commit febd5ac
Show file tree
Hide file tree
Showing 6 changed files with 276 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,7 @@ public static IServiceCollection AddAzureSearch(this IServiceCollection services
services.AddTransient<IIndexBuilder, IndexBuilder>();
services.AddTransient<INewPackageRegistrationProducer, NewPackageRegistrationProducer>();
services.AddTransient<IOwnerDataClient, OwnerDataClient>();
services.AddTransient<IOwnerSetComparer, OwnerSetComparer>();
services.AddTransient<IPackageEntityIndexActionBuilder, PackageEntityIndexActionBuilder>();
services.AddTransient<IRegistrationClient, RegistrationClient>();
services.AddTransient<ISearchDocumentBuilder, SearchDocumentBuilder>();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@
<Compile Include="Analysis\IdentifierCustomTokenFilter.cs" />
<Compile Include="Analysis\PackageIdCustomAnalyzer.cs" />
<Compile Include="Analysis\PackageIdCustomTokenizer.cs" />
<Compile Include="Owners2AzureSearch\IOwnerSetComparer.cs" />
<Compile Include="Owners2AzureSearch\OwnerSetComparer.cs" />
<Compile Include="Owners2AzureSearch\PackageIdToOwnersBuilder.cs" />
<Compile Include="Owners2AzureSearch\IOwnerDataClient.cs" />
<Compile Include="Owners2AzureSearch\OwnerDataClient.cs" />
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
// Copyright (c) .NET Foundation. All rights reserved.
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.

using System.Collections.Generic;

namespace NuGet.Services.AzureSearch.Owners2AzureSearch
{
public interface IOwnerSetComparer
{
SortedDictionary<string, string[]> Compare(
SortedDictionary<string, SortedSet<string>> oldData,
SortedDictionary<string, SortedSet<string>> newData);
}
}
101 changes: 101 additions & 0 deletions src/NuGet.Services.AzureSearch/Owners2AzureSearch/OwnerSetComparer.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
// Copyright (c) .NET Foundation. All rights reserved.
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.

using System;
using System.Collections.Generic;
using System.Linq;
using Microsoft.Extensions.Logging;

namespace NuGet.Services.AzureSearch.Owners2AzureSearch
{
public class OwnerSetComparer : IOwnerSetComparer
{
private static readonly string[] EmptyStringArray = new string[0];

private readonly ILogger _logger;

public OwnerSetComparer(ILogger logger)
{
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}

public SortedDictionary<string, string[]> Compare(
SortedDictionary<string, SortedSet<string>> oldData,
SortedDictionary<string, SortedSet<string>> newData)
{
if (oldData.Comparer != StringComparer.OrdinalIgnoreCase)
{
throw new ArgumentException("The old data should have a case-insensitive comparer.", nameof(oldData));
}

if (newData.Comparer != StringComparer.OrdinalIgnoreCase)
{
throw new ArgumentException("The new data should have a case-insensitive comparer.", nameof(newData));
}

// We use a very simplistic algorithm here. Perform one pass on the new data to find the added or changed
// package IDs. Then perform a second pass on the old data to find removed package IDs. We can optimize
// this later if necessary.
//
// We emit all of the usernames when a package ID's owners have changed instead of the delta. This is
// because Azure Search does not have a way to append or remove a specific item from a field that is an
// array. The entire new array needs to be provided.
var result = new SortedDictionary<string, string[]>(StringComparer.OrdinalIgnoreCase);

// First pass: find added or changed sets.
foreach (var pair in newData)
{
var id = pair.Key;
var newOwners = pair.Value;
if (!oldData.TryGetValue(id, out var oldOwners))
{
// ADDED: The package ID does not exist in the old data, which means the package ID was added.
result.Add(id, newOwners.ToArray());
_logger.LogInformation(
"The package ID {ID} has been added, with {AddedCount} owners.",
id,
newOwners.Count);
}
else
{
// The package ID exists in the old data. We need to check if the owner set has changed. Perform
// an ordinal comparison to allow username case changes to flow through.
var removedUsernames = oldOwners.Except(newOwners, StringComparer.Ordinal).ToList();
var addedUsernames = newOwners.Except(oldOwners, StringComparer.Ordinal).ToList();

if (removedUsernames.Any() || addedUsernames.Any())
{
// CHANGED: The username set has changed.
result.Add(id, newOwners.ToArray());
_logger.LogInformation(
"The package ID {ID} has an ownership change, with {RemovedCount} owners removed and " +
"{AddedCount} owners added.",
id,
removedUsernames.Count,
addedUsernames.Count);
}
}
}

// Second pass: find removed sets.
foreach (var pair in oldData)
{
var id = pair.Key;
var oldOwners = pair.Value;

if (!newData.TryGetValue(id, out var newOwners))
{
// REMOVED: The package ID does not exist in the new data, which means the package ID was removed.
result.Add(id, EmptyStringArray);
_logger.LogInformation(
"The package ID {ID} has been removed, with {RemovedCount} owners",
id,
oldOwners.Count);
}
}

return result;
}
}
}

Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@
<Compile Include="IndexBuilderFacts.cs" />
<Compile Include="Models\CommittedDocumentFacts.cs" />
<Compile Include="Owners2AzureSearch\OwnerDataClientFacts.cs" />
<Compile Include="Owners2AzureSearch\OwnerSetComparerFacts.cs" />
<Compile Include="Registration\RegistrationUrlBuilderFacts.cs" />
<Compile Include="SearchDocumentBuilderFacts.cs" />
<Compile Include="Registration\RegistrationClientFacts.cs" />
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
// Copyright (c) .NET Foundation. All rights reserved.
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.

using System.Collections.Generic;
using System.Linq;
using NuGet.Services.AzureSearch.Support;
using Xunit;
using Xunit.Abstractions;

namespace NuGet.Services.AzureSearch.Owners2AzureSearch
{
public class OwnerSetComparerFacts
{
public class Compare : Facts
{
public Compare(ITestOutputHelper output) : base(output)
{
}

[Fact]
public void FindsAddedPackageIds()
{
var oldData = Data("NuGet.Core: NuGet, Microsoft");
var newData = Data("NuGet.Core: NuGet, Microsoft",
"NuGet.Versioning: NuGet, Microsoft");

var changes = Target.Compare(oldData, newData);

var pair = Assert.Single(changes);
Assert.Equal("NuGet.Versioning", pair.Key);
Assert.Equal(new[] { "Microsoft", "NuGet" }, pair.Value);
}


[Fact]
public void FindsRemovedPackageIds()
{
var oldData = Data("NuGet.Core: NuGet, Microsoft",
"NuGet.Versioning: NuGet, Microsoft");
var newData = Data("NuGet.Core: NuGet, Microsoft");

var changes = Target.Compare(oldData, newData);

var pair = Assert.Single(changes);
Assert.Equal("NuGet.Versioning", pair.Key);
Assert.Empty(pair.Value);
}

[Fact]
public void FindsAddedOwner()
{
var oldData = Data("NuGet.Core: NuGet");
var newData = Data("NuGet.Core: NuGet, Microsoft");

var changes = Target.Compare(oldData, newData);

var pair = Assert.Single(changes);
Assert.Equal("NuGet.Core", pair.Key);
Assert.Equal(new[] { "Microsoft", "NuGet" }, pair.Value);
}

[Fact]
public void FindsRemovedOwner()
{
var oldData = Data("NuGet.Core: NuGet, Microsoft");
var newData = Data("NuGet.Core: NuGet");

var changes = Target.Compare(oldData, newData);

var pair = Assert.Single(changes);
Assert.Equal("NuGet.Core", pair.Key);
Assert.Equal(new[] { "NuGet" }, pair.Value);
}

[Fact]
public void FindsOwnerWithChangedCase()
{
var oldData = Data("NuGet.Core: NuGet, Microsoft");
var newData = Data("NuGet.Core: NuGet, microsoft");

var changes = Target.Compare(oldData, newData);

var pair = Assert.Single(changes);
Assert.Equal("NuGet.Core", pair.Key);
Assert.Equal(new[] { "microsoft", "NuGet" }, pair.Value);
}

[Fact]
public void FindsManyChangesAtOnce()
{
var oldData = Data("NuGet.Core: NuGet, Microsoft",
"NuGet.Frameworks: NuGet",
"NuGet.Protocol: NuGet");
var newData = Data("NuGet.Core: NuGet, microsoft",
"NuGet.Versioning: NuGet",
"NuGet.Protocol: NuGet");

var changes = Target.Compare(oldData, newData);

Assert.Equal(3, changes.Count);
Assert.Equal(new[] { "NuGet.Core", "NuGet.Frameworks", "NuGet.Versioning" }, changes.Keys.ToArray());
Assert.Equal(new[] { "microsoft", "NuGet" }, changes["NuGet.Core"]);
Assert.Empty(changes["NuGet.Frameworks"]);
Assert.Equal(new[] { "NuGet" }, changes["NuGet.Versioning"]);
}

[Fact]
public void FindsNoChanges()
{
var oldData = Data("NuGet.Core: NuGet, Microsoft",
"NuGet.Versioning: NuGet, Microsoft");
var newData = Data("NuGet.Core: NuGet, Microsoft",
"NuGet.Versioning: NuGet, Microsoft");

var changes = Target.Compare(oldData, newData);

Assert.Empty(changes);
}
}

public abstract class Facts
{
public Facts(ITestOutputHelper output)
{
Logger = output.GetLogger<OwnerSetComparer>();

Target = new OwnerSetComparer(Logger);
}

public RecordingLogger<OwnerSetComparer> Logger { get; }
public OwnerSetComparer Target { get; }

/// <summary>
/// A helper to turn lines formatted like this "PackageId: OwnerA, OwnerB" into package ID to owners
/// dictionary.
/// </summary>
public SortedDictionary<string, SortedSet<string>> Data(params string[] lines)
{
var builder = new PackageIdToOwnersBuilder(Logger);
foreach (var line in lines)
{
var pieces = line.Split(new[] { ':' }, 2);
var id = pieces[0].Trim();
var usernames = pieces[1]
.Split(',')
.Select(x => x.Trim())
.Where(x => x.Length > 0)
.ToList();

builder.Add(id, usernames);
}

return builder.GetResult();
}
}
}
}

0 comments on commit febd5ac

Please sign in to comment.