Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore: Add short scripts used to fetch all currently opened issues #2288

Merged
merged 6 commits into from
Dec 21, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions pkg/scripts/issues/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
1. To use the script, generate access token here: https://github.com/settings/tokens?type=beta.
2. To get all open issues invoke the [first script](./gh/main.go) setting `SF_TF_SCRIPT_GH_ACCESS_TOKEN`:
```shell
cd gh && SF_TF_SCRIPT_GH_ACCESS_TOKEN=<YOUR_PERSONAL_ACCESS_TOKEN> go run .
```
3. File `issues.json` should be generated in the `gh` directory. This is the input file for the second script.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why can't steps 2/3/4 be combined into one step? Getting and processing at same time.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The reason was that the data does not change between runs: The result of the first script is always the same. So this way, I was able to experiment on the static data that was faster and without locking my GH account because of the API limits they have.

4. To get process the issues invoke the [second script](./file/main.go):
```shell
cd file && go run .
```
5. File `issues.csv` should be generated in the `file` directory. This is the CSV which summarizes all the issues we have.
155 changes: 155 additions & 0 deletions pkg/scripts/issues/file/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
package main

import (
"encoding/csv"
"encoding/json"
"fmt"
"os"
"regexp"
"slices"
"strconv"
"strings"
"time"

i "github.com/Snowflake-Labs/terraform-provider-snowflake/pkg/scripts/issues"
)

func main() {
issues := loadIssues()
processedIssues := processIssues(issues)
saveCsv(processedIssues)
}

func loadIssues() []i.Issue {
bytes, err := os.ReadFile("../gh/issues.json")
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is this meant to load issues coming from multiple sources? if so, it seems to be strongly dependent on the github issue interface

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nope, only from GH

if err != nil {
panic(err)
}
var issues []i.Issue
err = json.Unmarshal(bytes, &issues)
if err != nil {
panic(err)
}
return issues
}

func processIssues(issues []i.Issue) []ProcessedIssue {
processedIssues := make([]ProcessedIssue, 0)
for idx, issue := range issues {
fmt.Printf("Processing issue (%d): %d\n", idx+1, issue.Number)
labels := make([]string, 0)
for _, label := range issue.Labels {
labels = append(labels, label.Name)
}
providerVersion, providerVersionMinor := getProviderVersion(issue)
terraformVersion := getTerraformVersion(issue)
processed := ProcessedIssue{
ID: issue.Number,
URL: issue.HtmlUrl,
NamedURL: fmt.Sprintf(`=HYPERLINK("%s","#%d")`, issue.HtmlUrl, issue.Number),
Title: issue.Title,
ProviderVersion: providerVersion,
ProviderVersionMinor: providerVersionMinor,
TerraformVersion: terraformVersion,
IsBug: slices.Contains(labels, "bug"),
IsFeatureRequest: slices.Contains(labels, "feature-request"),
CommentsCount: issue.Comments,
ReactionsCount: issue.Reactions.TotalCount,
CreatedAt: issue.CreatedAt,
Labels: labels,
}
processedIssues = append(processedIssues, processed)
}
return processedIssues
}

/*
* For newer issues it should be where (...) are:
* ### Terraform CLI and Provider Versions (...) ### Terraform Configuration
* For older issues it should be where (...) are:
* **Provider Version** (...) **Terraform Version**
*/
func getProviderVersion(issue i.Issue) (string, string) {
oldRegex := regexp.MustCompile(`\*\*Provider Version\*\*\s*([[:ascii:]]*)\s*\*\*Terraform Version\*\*`)
matches := oldRegex.FindStringSubmatch(issue.Body)
if len(matches) == 0 {
return "NONE", ""
} else {
versionRegex := regexp.MustCompile(`v?\.?(\d+\.(\d+)(.\d+)?)`)
vMatches := versionRegex.FindStringSubmatch(matches[1])
if len(vMatches) == 0 {
return "NONE", ""
} else {
return vMatches[1], vMatches[2]
}
}
}

/*
* For newer issues it should be where (...) are:
* ### Terraform CLI and Provider Versions (...) ### Terraform Configuration
* For older issues it should be where (...) are:
* **Terraform Version** (...) **Describe the bug**
*/
func getTerraformVersion(issue i.Issue) string {
oldRegex := regexp.MustCompile(`\*\*Terraform Version\*\*\s*([[:ascii:]]*)\s*\*\*Describe the bug\*\*`)
matches := oldRegex.FindStringSubmatch(issue.Body)
if len(matches) == 0 {
return "NONE"
} else {
versionRegex := regexp.MustCompile(`v?\.?(\d+\.(\d+)(.\d+)?)`)
vMatches := versionRegex.FindStringSubmatch(matches[1])
if len(vMatches) == 0 {
return "NONE"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

instead of returning None, we could return the version of Terraform that was current at the time the issue was created.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I was thinking about that: to keep a static list of releases with dates or to fetch them through API also. But this way we would get the latest possible version, which is not necessarily true. Also the regexps for extracting the version are not perfect. I guess this is the addition not needed now, because I already have a version for each issue.

} else {
return vMatches[1]
}
}
}

func saveCsv(issues []ProcessedIssue) {
file, err := os.Create("issues.csv")
if err != nil {
panic(err)
}
defer file.Close()
w := csv.NewWriter(file)
w.Comma = ';'

data := make([][]string, 0, len(issues))
for _, issue := range issues {
row := []string{
// strconv.Itoa(issue.ID),
// issue.URL,
issue.NamedURL,
issue.Title,
issue.ProviderVersion,
issue.ProviderVersionMinor,
issue.TerraformVersion,
strconv.FormatBool(issue.IsBug),
strconv.FormatBool(issue.IsFeatureRequest),
strconv.Itoa(issue.CommentsCount),
strconv.Itoa(issue.ReactionsCount),
issue.CreatedAt.Format(time.DateOnly),
strings.Join(issue.Labels, "|"),
}
data = append(data, row)
}
_ = w.WriteAll(data)
}

type ProcessedIssue struct {
ID int
URL string
NamedURL string
Title string
ProviderVersion string
ProviderVersionMinor string
TerraformVersion string
IsBug bool
IsFeatureRequest bool
CommentsCount int
ReactionsCount int
CreatedAt time.Time
Labels []string
}
102 changes: 102 additions & 0 deletions pkg/scripts/issues/gh/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
package main

import (
"encoding/json"
"errors"
"fmt"
"io"
"net/http"
"os"
"strconv"
"time"

"github.com/Snowflake-Labs/terraform-provider-snowflake/pkg/scripts/issues"
)

func main() {
accessToken := getAccessToken()
issues := fetchAllIssues(accessToken)
saveIssues(issues)
}

func getAccessToken() string {
token := os.Getenv("SF_TF_SCRIPT_GH_ACCESS_TOKEN")
if token == "" {
panic(errors.New("GitHub access token missing"))
}
return token
}

func fetchAllIssues(token string) []issues.Issue {
client := &http.Client{}
allIssues := make([]issues.Issue, 0)
moreIssues := true
page := 1
for moreIssues {
fmt.Printf("Running batch %d\n", page)
req := prepareRequest(50, page, token)
bytes := invokeReq(client, req)
batch := getIssuesBatch(bytes)
if len(batch) == 0 {
moreIssues = false
} else {
for _, issue := range batch {
if issue.PullRequest == nil {
allIssues = append(allIssues, issue)
} else {
fmt.Printf("Skipping issue %d, it is a PR\n", issue.Number)
}
}
page++
}
fmt.Printf("Sleeping for a moment...\n")
time.Sleep(5 * time.Second)
}
return allIssues
}

func prepareRequest(perPage int, page int, token string) *http.Request {
req, err := http.NewRequest("GET", "https://api.github.com/repos/Snowflake-Labs/terraform-provider-snowflake/issues", nil)
if err != nil {
panic(err)
}
q := req.URL.Query()
q.Add("per_page", strconv.Itoa(perPage))
q.Add("page", strconv.Itoa(page))
q.Add("state", "open")
req.URL.RawQuery = q.Encode()
req.Header.Add("Accept", "application/vnd.github+json")
req.Header.Add("Authorization", fmt.Sprintf("Bearer %s", token))
fmt.Printf("Prepared URL: %s\n", req.URL.String())
return req
}

func invokeReq(client *http.Client, req *http.Request) []byte {
resp, err := client.Do(req)
if err != nil {
panic(err)
}
defer resp.Body.Close()
bodyBytes, err := io.ReadAll(resp.Body)
if err != nil {
panic(err)
}
return bodyBytes
}

func getIssuesBatch(bytes []byte) []issues.Issue {
var issues []issues.Issue
err := json.Unmarshal(bytes, &issues)
if err != nil {
panic(err)
}
return issues
}

func saveIssues(issues []issues.Issue) {
bytes, err := json.Marshal(issues)
if err != nil {
panic(err)
}
_ = os.WriteFile("issues.json", bytes, 0o600)
}
30 changes: 30 additions & 0 deletions pkg/scripts/issues/model.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
package issues

import "time"

type Issue struct {
HtmlUrl string `json:"html_url"`
Number int `json:"number"`
Title string `json:"title"`
Labels []Label `json:"labels"`
State string `json:"state"`
Comments int `json:"comments"`
CreatedAt time.Time `json:"created_at"`
UpdatedAt time.Time `json:"updated_at"`
Body string `json:"body"`
Reactions Reactions `json:"reactions"`
PullRequest *PullRequest `json:"pull_request"`
}

type Label struct {
Url string `json:"url"`
Name string `json:"name"`
}

type Reactions struct {
TotalCount int `json:"total_count"`
}

type PullRequest struct {
HtmlUrl string `json:"html_url"`
}
Loading