diff --git a/parsers/guesser.go b/parsers/guesser.go index 176a885..9965b4d 100644 --- a/parsers/guesser.go +++ b/parsers/guesser.go @@ -7,6 +7,7 @@ import ( vectorspace "github.com/boyter/golangvectorspace" "github.com/briandowns/spinner" "io/ioutil" + "math" "os" "path/filepath" "regexp" @@ -124,6 +125,9 @@ func guessLicense(content string, deepguess bool, licenses []License) []LicenseM contentConcordance := vectorspace.BuildConcordance(string(runecontent[:trimto])) relation := vectorspace.Relation(matchingLicense.Concordance, contentConcordance) + // Average out the vector calc against the keyword percentage + relation = (relation + math.Min(1, (license.Percentage/100)+0.5)) / 2 + if relation >= confidence { matchingLicenses = append(matchingLicenses, LicenseMatch{LicenseId: license.LicenseId, Percentage: relation}) } diff --git a/parsers/guesser_test.go b/parsers/guesser_test.go index 2aca90f..3153354 100644 --- a/parsers/guesser_test.go +++ b/parsers/guesser_test.go @@ -153,7 +153,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.` result := guessLicense(content, deepGuess, loadDatabase()) - t.Log(result) if result[0].LicenseId != "MIT" { t.Errorf("Should be MIT") } @@ -182,12 +181,183 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.` result = guessLicense(content, deepGuess, loadDatabase()) - t.Log(result) if result[0].LicenseId != "MIT" { t.Errorf("Should be MIT") } } +func TestRegressionIssue41(t *testing.T) { + content := `Copyright (C) 2010-2014 Jonas Borgström +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + 3. The name of the author may not be used to endorse or promote + products derived from this software without specific prior + written permission. + +THIS SOFTWARE IS PROVIDED BY THE AUTHOR ''AS IS'' AND ANY EXPRESS +OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE +GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER +IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN +IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.` + + result := guessLicense(content, deepGuess, loadDatabase()) + + if result[0].LicenseId != "BSD-2-Clause" { + t.Errorf("Should be BSD-2-Clause was %s", result[0].LicenseId) + } +} + +func TestRegressionIssue40(t *testing.T) { + // Ideas... trim out the copyright for the text + content := `Copyright (c) 2014-2016 Lazaros Koromilas +Copyright (c) 2014-2016 Dimitris Papastamos +Copyright (c) 2016-2018 Arun Prakash Jana +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE AUTHOR ''AS IS'' AND ANY EXPRESS OR +IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.` + + result := guessLicense(content, deepGuess, loadDatabase()) + + if result[0].LicenseId != "BSD-2-Clause" { + t.Errorf("Should be BSD-2-Clause was %s", result[0].LicenseId) + } +} + +func TestRegressionIssue39(t *testing.T) { + content := `Copyright (c) 1989-1994 + The Regents of the University of California. All rights reserved. +Copyright (c) 1997 Christos Zoulas. All rights reserved. +Copyright (c) 1997-2005 + Herbert Xu . All rights reserved. + +This code is derived from software contributed to Berkeley by Kenneth Almquist. + + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. +3. Neither the name of the University nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ''AS IS'' AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +SUCH DAMAGE. + +mksignames.c: + +This file is not directly linked with dash. However, its output is. + +Copyright (C) 1992 Free Software Foundation, Inc. + +This file is part of GNU Bash, the Bourne Again SHell. + +Bash is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 2, or (at your option) any later +version. + +Bash is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License with +your Debian GNU/Linux system, in /usr/share/common-licenses/GPL, or with the +Debian GNU/Linux hello source package as the file COPYING. If not, +write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, +Boston, MA 02111 USA.` + + result := guessLicense(content, deepGuess, loadDatabase()) + + if result[0].LicenseId != "BSD-2-Clause" { + t.Errorf("Should be BSD-2-Clause was %s", result[0].LicenseId) + } +} + +func TestRegressionIssue38(t *testing.T) { + // Ideas... trimming the copyright works here + content := `Copyright 2001-2009 Jean-Marc Valin, Timothy B. Terriberry, + CSIRO, and other contributors + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +- Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +''AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.` + + result := guessLicense(content, deepGuess, loadDatabase()) + + if result[0].LicenseId != "BSD-2-Clause" { + t.Errorf("Should be BSD-2-Clause was %s", result[0].LicenseId) + } +} + func TestIdentifierGuessLicence(t *testing.T) { actual := identifierGuessLicence("test", loadDatabase()) if len(actual) != 0 {