From 3aa409661d27cb4fea576489d59c055bae85dbc7 Mon Sep 17 00:00:00 2001 From: Oliver Bristow Date: Sun, 5 Apr 2020 16:39:23 +0100 Subject: [PATCH] GH-355: Handle duplicates in --people-dict and extend docs Signed-off-by: Oliver Bristow --- README.md | 9 ++++++++- internal/plumbing/identity/identity.go | 13 ++++++++++--- internal/plumbing/identity/identity_test.go | 19 ++++++++++++++++--- internal/test_data/identities | 4 +++- 4 files changed, 37 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 96c4c3a2..91b2484d 100644 --- a/README.md +++ b/README.md @@ -223,7 +223,14 @@ and add the unknown email to the list of that developer's emails. If `--people-dict` is specified, it should point to a text file with the custom identities. The format is: every line is a single developer, it contains all the matching emails and names separated -by `|`. The case is ignored. +by `|`. The case is ignored. Example file contents: +``` +Linus Torvalds|torvalds@linux-foundation.org +Vadim Markovtsev|vadim@sourced.tech|another@one.com +``` + +If `--people-dict` is not specified a [`.mailmap`](https://git-scm.com/docs/git-check-mailmap) file +will be used if it exists in the latest commit. #### Overwrites matrix diff --git a/internal/plumbing/identity/identity.go b/internal/plumbing/identity/identity.go index 9095352d..927e72d1 100644 --- a/internal/plumbing/identity/identity.go +++ b/internal/plumbing/identity/identity.go @@ -186,11 +186,18 @@ func (detector *Detector) LoadPeopleDict(path string) error { size := 0 for scanner.Scan() { ids := strings.Split(scanner.Text(), "|") + canon := ids[0] + var exists bool + var canonIndex int + // lookup or create a new canonical value + if canonIndex, exists = dict[strings.ToLower(canon)]; !exists { + reverseDict = append(reverseDict, canon) + size++ + canonIndex = size + } for _, id := range ids { - dict[strings.ToLower(id)] = size + dict[strings.ToLower(id)] = canonIndex } - reverseDict = append(reverseDict, ids[0]) - size++ } reverseDict = append(reverseDict, AuthorMissingName) detector.PeopleDict = dict diff --git a/internal/plumbing/identity/identity_test.go b/internal/plumbing/identity/identity_test.go index 716447b4..a79ff331 100644 --- a/internal/plumbing/identity/identity_test.go +++ b/internal/plumbing/identity/identity_test.go @@ -177,7 +177,7 @@ func TestIdentityDetectorLoadPeopleDict(t *testing.T) { id := fixtureIdentityDetector() err := id.LoadPeopleDict(path.Join("..", "..", "test_data", "identities")) assert.Nil(t, err) - assert.Equal(t, len(id.PeopleDict), 7) + assert.Equal(t, len(id.PeopleDict), 10) assert.Contains(t, id.PeopleDict, "linus torvalds") assert.Contains(t, id.PeopleDict, "torvalds@linux-foundation.org") assert.Contains(t, id.PeopleDict, "vadim markovtsev") @@ -185,11 +185,24 @@ func TestIdentityDetectorLoadPeopleDict(t *testing.T) { assert.Contains(t, id.PeopleDict, "another@one.com") assert.Contains(t, id.PeopleDict, "máximo cuadros") assert.Contains(t, id.PeopleDict, "maximo@sourced.tech") - assert.Equal(t, len(id.ReversedPeopleDict), 4) + assert.Contains(t, id.PeopleDict, "duplicate") + assert.Contains(t, id.PeopleDict, "first@example.com") + assert.Contains(t, id.PeopleDict, "second@example.com") + + assert.Equal(t, len(id.ReversedPeopleDict), 5) assert.Equal(t, id.ReversedPeopleDict[0], "Linus Torvalds") assert.Equal(t, id.ReversedPeopleDict[1], "Vadim Markovtsev") assert.Equal(t, id.ReversedPeopleDict[2], "Máximo Cuadros") - assert.Equal(t, id.ReversedPeopleDict[3], AuthorMissingName) + assert.Equal(t, id.ReversedPeopleDict[3], "Duplicate") + assert.Equal(t, id.ReversedPeopleDict[4], AuthorMissingName) + + assert.Equal(t, id.PeopleDict["duplicate"], id.PeopleDict["first@example.com"]) + assert.Equal(t, id.PeopleDict["duplicate"], id.PeopleDict["second@example.com"]) + + assert.Equal(t, id.PeopleDict["vadim markovtsev"], id.PeopleDict["vadim@sourced.tech"]) + assert.Equal(t, id.PeopleDict["vadim markovtsev"], id.PeopleDict["another@one.com"]) + + assert.NotEqual(t, id.PeopleDict["duplicate"], id.PeopleDict["vadim markovtsev"]) } func TestIdentityDetectorLoadPeopleDictWrongPath(t *testing.T) { diff --git a/internal/test_data/identities b/internal/test_data/identities index 0332510f..65651cde 100644 --- a/internal/test_data/identities +++ b/internal/test_data/identities @@ -1,3 +1,5 @@ Linus Torvalds|torvalds@linux-foundation.org Vadim Markovtsev|vadim@sourced.tech|another@one.com -Máximo Cuadros|maximo@sourced.tech \ No newline at end of file +Máximo Cuadros|maximo@sourced.tech +Duplicate|first@example.com +Duplicate|second@example.com