Skip to content

Commit

Permalink
feat: print summary when the crawling is done (#312)
Browse files Browse the repository at this point in the history
See #307.
  • Loading branch information
bfabio committed Nov 18, 2022
1 parent ea0cb5c commit ba9fe19
Show file tree
Hide file tree
Showing 2 changed files with 71 additions and 26 deletions.
85 changes: 59 additions & 26 deletions crawler/crawler.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,10 +57,21 @@ func NewCrawler(dryRun bool) *Crawler {

// Register Prometheus metrics.
metrics.RegisterPrometheusCounter("repository_processed", "Number of repository processed.", c.Index)
metrics.RegisterPrometheusCounter("repository_file_saved", "Number of file saved.", c.Index)
metrics.RegisterPrometheusCounter("repository_file_indexed", "Number of file indexed.", c.Index)
metrics.RegisterPrometheusCounter(
"repository_good_publiccodeyml", "Number of valid publiccode.yml files in the processed repos.",
c.Index,
)
metrics.RegisterPrometheusCounter(
"repository_bad_publiccodeyml", "Number of invalid publiccode.yml files in the processed repos.",
c.Index,
)
metrics.RegisterPrometheusCounter("repository_cloned", "Number of repository cloned", c.Index)
//metrics.RegisterPrometheusCounter("repository_file_saved_valid", "Number of valid file saved.", c.Index)
metrics.RegisterPrometheusCounter("repository_new", "Number of new repositories", c.Index)
metrics.RegisterPrometheusCounter("repository_known", "Number of already known repositories", c.Index)
metrics.RegisterPrometheusCounter(
"repository_upsert_failures", "Number of failures in creating or updating software in the API",
c.Index,
)

c.gitHubScanner = scanner.NewGitHubScanner()
c.gitLabScanner = scanner.NewGitLabScanner()
Expand Down Expand Up @@ -150,6 +161,17 @@ func (c *Crawler) crawl() error {
close(reposChan)
c.repositoriesWg.Wait()

log.Infof(
"Summary: Total repos scanned: %v. With good publiccode.yml file: %v. With bad publiccode.yml file: %v\n"+
"Repos with good publiccode.yml file: New repos: %v, Known repos: %v, Failures saving to API: %v",
metrics.GetCounterValue("repository_processed", c.Index),
metrics.GetCounterValue("repository_good_publiccodeyml", c.Index),
metrics.GetCounterValue("repository_bad_publiccodeyml", c.Index),
metrics.GetCounterValue("repository_new", c.Index),
metrics.GetCounterValue("repository_known", c.Index),
metrics.GetCounterValue("repository_upsert_failures", c.Index),
)

return nil
}

Expand Down Expand Up @@ -286,6 +308,7 @@ func (c *Crawler) ProcessRepo(repository common.Repository) {
parser, err = publiccode.NewParser(repository.FileRawURL)
if err != nil {
logEntries = append(logEntries,fmt.Sprintf("[%s] BAD publiccode.yml: %s\n", repository.Name, err.Error()))
metrics.GetCounter("repository_bad_publiccodeyml", c.Index).Inc()

return
}
Expand All @@ -311,6 +334,7 @@ func (c *Crawler) ProcessRepo(repository common.Repository) {

if !valid {
logEntries = append(logEntries, fmt.Sprintf("[%s] BAD publiccode.yml: %+v\n", repository.Name, err))
metrics.GetCounter("repository_bad_publiccodeyml", c.Index).Inc()

return
}
Expand All @@ -323,36 +347,17 @@ func (c *Crawler) ProcessRepo(repository common.Repository) {
err = validateFile(repository.Publisher, *parser, repository.FileRawURL)
if err != nil {
logEntries = append(logEntries, fmt.Sprintf("[%s] BAD publiccode.yml: %+v\n", repository.Name, err))
metrics.GetCounter("repository_bad_publiccodeyml", c.Index).Inc()

return
}
}

logEntries = append(logEntries, fmt.Sprintf("[%s] GOOD publiccode.yml\n", repository.Name))
metrics.GetCounter("repository_good_publiccodeyml", c.Index).Inc()

if c.DryRun {
log.Infof("[%s]: Skipping other steps (--dry-run)", repository.Name)
return
}

if !viper.GetBool("SKIP_VITALITY") {
// Clone repository.
err = git.CloneRepository(repository.URL.Host, repository.Name, parser.PublicCode.URL.String(), c.Index)
if err != nil {
logEntries = append(logEntries, fmt.Sprintf("[%s] error while cloning: %v\n", repository.Name, err))
}

// Calculate Repository activity index and vitality. Defaults to 60 days.
var activityDays int = 60
if viper.IsSet("ACTIVITY_DAYS") {
activityDays = viper.GetInt("ACTIVITY_DAYS")
}
activityIndex, _, err := git.CalculateRepoActivity(repository, activityDays)
if err != nil {
logEntries = append(logEntries, fmt.Sprintf("[%s] error calculating activity index: %v\n", repository.Name, err))
} else {
logEntries = append(logEntries, fmt.Sprintf("[%s] activity index in the last %d days: %f\n", repository.Name, activityDays, activityIndex))
}
}

var aliases []string
Expand All @@ -373,19 +378,47 @@ func (c *Crawler) ProcessRepo(repository common.Repository) {
}

if software == nil {
_, err = c.apiClient.PostSoftware(url, aliases, string(publiccodeYml))
metrics.GetCounter("repository_new", c.Index).Inc()
if !c.DryRun {
_, err = c.apiClient.PostSoftware(url, aliases, string(publiccodeYml))
}
} else {
for _, alias := range software.Aliases {
if !slices.Contains(aliases, alias) {
aliases = append(aliases, alias)
}
}

_, err = c.apiClient.PatchSoftware(software.ID, url, aliases, string(publiccodeYml))
metrics.GetCounter("repository_known", c.Index).Inc()
if !c.DryRun {
_, err = c.apiClient.PatchSoftware(software.ID, url, aliases, string(publiccodeYml))
}
}
if err != nil {
logEntries = append(logEntries, fmt.Sprintf("[%s]: %s", repository.Name, err.Error()))
metrics.GetCounter("repository_upsert_failures", c.Index).Inc()
}

if !viper.GetBool("SKIP_VITALITY") && !c.DryRun {
// Clone repository.
err = git.CloneRepository(repository.URL.Host, repository.Name, parser.PublicCode.URL.String(), c.Index)
if err != nil {
logEntries = append(logEntries, fmt.Sprintf("[%s] error while cloning: %v\n", repository.Name, err))
}

// Calculate Repository activity index and vitality. Defaults to 60 days.
var activityDays int = 60
if viper.IsSet("ACTIVITY_DAYS") {
activityDays = viper.GetInt("ACTIVITY_DAYS")
}
activityIndex, _, err := git.CalculateRepoActivity(repository, activityDays)
if err != nil {
logEntries = append(logEntries, fmt.Sprintf("[%s] error calculating activity index: %v\n", repository.Name, err))
} else {
logEntries = append(logEntries, fmt.Sprintf("[%s] activity index in the last %d days: %f\n", repository.Name, activityDays, activityIndex))
}
}

}

// validateFile checks if it.riuso.codiceIPA in the publiccode.yml matches with the
Expand Down
12 changes: 12 additions & 0 deletions metrics/prometheus.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"regexp"

"github.com/prometheus/client_golang/prometheus"
dto "github.com/prometheus/client_model/go"
"github.com/prometheus/client_golang/prometheus/promhttp"
log "github.com/sirupsen/logrus"
)
Expand All @@ -30,6 +31,17 @@ func GetCounter(name, namespace string) prometheus.Counter {
return registeredCounters[name]
}

func GetCounterValue(name, namespace string) float64 {
var m = &dto.Metric{}

if err := GetCounter(name, namespace).Write(m); err != nil {
log.Error(err)
return 0
}

return m.Counter.GetValue()
}

// RegisterPrometheusCounter register a new Counter of given name with help text.
func RegisterPrometheusCounter(name, helpText, namespace string) {
// Validate and fix name (replace invalid chars with underscore "_").
Expand Down

0 comments on commit ba9fe19

Please sign in to comment.