From 00ccf89dd138eb9c021d545e5ff6a565c5c602f8 Mon Sep 17 00:00:00 2001 From: Tony Kappen <32112423+mrKappen@users.noreply.github.com> Date: Thu, 24 Sep 2020 17:21:00 -0400 Subject: [PATCH] WIP: test stale repositories (#3210) * fix typo in README.md fixes #3204 * #1446 implement test for stale repositories * fix #1446 * fixes #3211 added check if issue has not been previously opened * fixes #3211 add limit to number of issues created at a time * fixes #3211 reformat issue message * checks for dead links as well * fixes #3211 handle status code 302 and 301 * fixes #3211 handle status code 302 and 301 * fixes #3211 handle status code 302 and 301 * fixes #3211 test workflow * fixes #3211 test workflow * fixes #3211 test workflow again * fixes #3211 test workflow again * remove workflows and start over * re add workflow * apply review suggestions * add environment variable. modify workflow to run once a week * add check for archived repositories and reformat * reformat code to improve readability * reformat to improve readability * cause continue and not break if href not found * satisfy code climate requirements --- .github/workflows/run-check.yaml | 16 ++ .gitignore | 4 +- repo_test.go | 22 --- scripts.go | 29 ++++ test_stale_repositories.go | 281 +++++++++++++++++++++++++++++++ 5 files changed, 329 insertions(+), 23 deletions(-) create mode 100644 .github/workflows/run-check.yaml create mode 100644 scripts.go create mode 100644 test_stale_repositories.go diff --git a/.github/workflows/run-check.yaml b/.github/workflows/run-check.yaml new file mode 100644 index 00000000000..7a758cb87ab --- /dev/null +++ b/.github/workflows/run-check.yaml @@ -0,0 +1,16 @@ +name: Check For Stale Repositories +on: + schedule: + - cron: '0 0 * * 0' +jobs: + build: + name: Running test + runs-on: ubuntu-latest + strategy: + matrix: + go: ['1.15.x'] + steps: + - shell: bash + env: + OAUTH_TOKEN: ${{secrets.OAUTH_TOKEN}} + run: go get -t -v ./... && go run test_stale_repositories.go scripts.go \ No newline at end of file diff --git a/.gitignore b/.gitignore index 382fc505e6b..19305f8caac 100644 --- a/.gitignore +++ b/.gitignore @@ -3,4 +3,6 @@ awesome-go # Folders .idea -.vscode \ No newline at end of file +.vscode +test_stale_repositories_log +*.exe \ No newline at end of file diff --git a/repo_test.go b/repo_test.go index 19ac5d4aa2b..9b694fa1c93 100644 --- a/repo_test.go +++ b/repo_test.go @@ -1,7 +1,6 @@ package main import ( - "bytes" "io/ioutil" "os" "regexp" @@ -11,7 +10,6 @@ import ( "text/template" "github.com/PuerkitoBio/goquery" - "github.com/russross/blackfriday" gfm "github.com/shurcooL/github_flavored_markdown" ) @@ -97,26 +95,6 @@ func testList(t *testing.T, list *goquery.Selection) { }) } -func readme() []byte { - input, err := ioutil.ReadFile("./README.md") - if err != nil { - panic(err) - } - html := append([]byte(""), blackfriday.MarkdownCommon(input)...) - html = append(html, []byte("")...) - return html -} - -func startQuery() *goquery.Document { - buf := bytes.NewBuffer(readme()) - query, err := goquery.NewDocumentFromReader(buf) - if err != nil { - panic(err) - } - - return query -} - func checkAlphabeticOrder(t *testing.T, s *goquery.Selection) { items := s.Find("li > a:first-child").Map(func(_ int, li *goquery.Selection) string { return strings.ToLower(li.Text()) diff --git a/scripts.go b/scripts.go new file mode 100644 index 00000000000..1d0a260cb71 --- /dev/null +++ b/scripts.go @@ -0,0 +1,29 @@ +package main + +import ( + "bytes" + "fmt" + "io/ioutil" + + "github.com/PuerkitoBio/goquery" + "github.com/russross/blackfriday" +) + +func readme() []byte { + input, err := ioutil.ReadFile("./README.md") + if err != nil { + panic(err) + } + html := fmt.Sprintf("%s", blackfriday.MarkdownCommon(input)) + htmlByteArray := []byte(html) + return htmlByteArray +} + +func startQuery() *goquery.Document { + buf := bytes.NewBuffer(readme()) + query, err := goquery.NewDocumentFromReader(buf) + if err != nil { + panic(err) + } + return query +} diff --git a/test_stale_repositories.go b/test_stale_repositories.go new file mode 100644 index 00000000000..8eace2a6429 --- /dev/null +++ b/test_stale_repositories.go @@ -0,0 +1,281 @@ +package main + +import ( + "bytes" + "encoding/json" + "fmt" + "log" + "net/http" + "os" + "regexp" + "strings" + "text/template" + "time" + + "github.com/PuerkitoBio/goquery" + "golang.org/x/oauth2" +) + +const issueTemplate = ` +{{range .}} +- [ ] {{.}} +{{end}} +` + +var reGithubRepo = regexp.MustCompile("https://github.com/[a-zA-Z0-9-._]+/[a-zA-Z0-9-._]+$") +var githubGETREPO = "https://api.github.com/repos%s" +var githubGETCOMMITS = "https://api.github.com/repos%s/commits" +var githubPOSTISSUES = "https://api.github.com/repos/avelino/awesome-go/issues" +var awesomeGoGETISSUES = "http://api.github.com/repos/avelino/awesome-go/issues" //only returns open issues +var numberOfYears time.Duration = 1 + +const issueTitle = "Investigate repositories with more than 1 year without update" +const deadLinkMessage = " this repository might no longer exist! (status code >= 400 returned)" +const movedPermanently = " status code 301 received" +const status302 = " status code 302 received" +const archived = " repository has been archived" + +var delay time.Duration = 1 + +//LIMIT specifies the max number of repositories that are added in a single run of the script +var LIMIT = 10 +var ctr = 0 + +type tokenSource struct { + AccessToken string +} +type issue struct { + Title string `json:"title"` + Body string `json:"body"` +} +type repo struct { + Archived bool `json:"archived"` +} + +func (t *tokenSource) Token() (*oauth2.Token, error) { + token := &oauth2.Token{ + AccessToken: t.AccessToken, + } + return token, nil +} +func getRepositoriesFromBody(body string) []string { + links := strings.Split(body, "- ") + for idx, link := range links { + str := strings.ReplaceAll(link, "\r", "") + str = strings.ReplaceAll(str, "[ ]", "") + str = strings.ReplaceAll(str, "[x]", "") + str = strings.ReplaceAll(str, " ", "") + str = strings.ReplaceAll(str, "\n", "") + str = strings.ReplaceAll(str, deadLinkMessage, "") + str = strings.ReplaceAll(str, movedPermanently, "") + str = strings.ReplaceAll(str, status302, "") + str = strings.ReplaceAll(str, archived, "") + links[idx] = str + } + return links +} +func generateIssueBody(repositories []string) (string, error) { + var writer bytes.Buffer + t := template.New("issue") + temp, err := t.Parse(issueTemplate) + if err != nil { + log.Print("Failed to generate template") + return "", err + } + err = temp.Execute(&writer, repositories) + if err != nil { + log.Print("Failed to generate template") + return "", err + } + issueBody := writer.String() + return issueBody, nil +} +func createIssue(staleRepos []string, client *http.Client) { + if len(staleRepos) == 0 { + log.Print("NO STALE REPOSITORIES") + return + } + body, err := generateIssueBody(staleRepos) + if err != nil { + log.Print("Failed at CreateIssue") + return + } + newIssue := &issue{ + Title: issueTitle, + Body: body, + } + buf := new(bytes.Buffer) + json.NewEncoder(buf).Encode(newIssue) + req, err := http.NewRequest("POST", githubPOSTISSUES, buf) + if err != nil { + log.Print("Failed at CreateIssue") + return + } + client.Do(req) +} +func getAllFlaggedRepositories(client *http.Client, flaggedRepositories *map[string]bool) error { + req, err := http.NewRequest("GET", awesomeGoGETISSUES, nil) + if err != nil { + log.Print("Failed to get all issues") + return err + } + res, err := client.Do(req) + if err != nil { + log.Print("Failed to get all issues") + return err + } + target := []issue{} + defer res.Body.Close() + json.NewDecoder(res.Body).Decode(&target) + for _, i := range target { + if i.Title == issueTitle { + repos := getRepositoriesFromBody(i.Body) + for _, repo := range repos { + (*flaggedRepositories)[repo] = true + } + } + } + return nil +} +func containsOpenIssue(link string, openIssues map[string]bool) bool { + _, ok := openIssues[link] + if ok { + return true + } + return false +} +func testRepoState(toRun bool, href string, client *http.Client, staleRepos *[]string) bool { + if toRun { + ownerRepo := strings.ReplaceAll(href, "https://github.com", "") + apiCall := fmt.Sprintf(githubGETREPO, ownerRepo) + req, err := http.NewRequest("GET", apiCall, nil) + var repoResp repo + isRepoAdded := false + if err != nil { + log.Printf("Failed at repository %s\n", href) + return false + } + resp, err := client.Do(req) + if err != nil { + log.Printf("Failed at repository %s\n", href) + return false + } + defer resp.Body.Close() + json.NewDecoder(resp.Body).Decode(&repoResp) + if resp.StatusCode == 301 { + *staleRepos = append(*staleRepos, href+movedPermanently) + log.Printf("%s returned 301", href) + isRepoAdded = true + } + if resp.StatusCode == 302 && !isRepoAdded { + *staleRepos = append(*staleRepos, href+status302) + log.Printf("%s returned 302", href) + isRepoAdded = true + } + if resp.StatusCode >= 400 && !isRepoAdded { + *staleRepos = append(*staleRepos, href+deadLinkMessage) + log.Printf("%s might not exist!", href) + isRepoAdded = true + } + if repoResp.Archived && !isRepoAdded { + *staleRepos = append(*staleRepos, href+archived) + log.Printf("%s is archived!", href) + isRepoAdded = true + } + return isRepoAdded + } + return false +} +func testCommitAge(toRun bool, href string, client *http.Client, staleRepos *[]string) bool { + if toRun { + var respObj []map[string]interface{} + now := time.Now() + since := now.Add(-1 * 365 * 24 * numberOfYears * time.Hour) + sinceQuery := since.Format(time.RFC3339) + ownerRepo := strings.ReplaceAll(href, "https://github.com", "") + apiCall := fmt.Sprintf(githubGETCOMMITS, ownerRepo) + req, err := http.NewRequest("GET", apiCall, nil) + isRepoAdded := false + if err != nil { + log.Printf("Failed at repository %s\n", href) + return false + } + q := req.URL.Query() + q.Add("since", sinceQuery) + req.URL.RawQuery = q.Encode() + resp, err := client.Do(req) + if err != nil { + log.Printf("Failed at repository %s\n", href) + return false + } + defer resp.Body.Close() + json.NewDecoder(resp.Body).Decode(&respObj) + isAged := len(respObj) == 0 + if isAged { + log.Printf("%s has not had a commit in a while", href) + *staleRepos = append(*staleRepos, href) + isRepoAdded = true + } + return isRepoAdded + } + return false +} +func testStaleRepository() { + query := startQuery() + var staleRepos []string + addressedRepositories := make(map[string]bool) + oauth := os.Getenv("GITHUB_OAUTH_TOKEN") + client := &http.Client{} + if oauth == "" { + log.Print("No oauth token found. Using unauthenticated client ...") + } else { + tokenSource := &tokenSource{ + AccessToken: oauth, + } + client = oauth2.NewClient(oauth2.NoContext, tokenSource) + } + err := getAllFlaggedRepositories(client, &addressedRepositories) + + if err != nil { + log.Println("Failed to get existing issues. Exiting...") + return + } + query.Find("body li > a:first-child").EachWithBreak(func(_ int, s *goquery.Selection) bool { + href, ok := s.Attr("href") + if !ok { + log.Println("expected to have href") + return true + } + if ctr >= LIMIT && LIMIT != -1 { + log.Print("Max number of issues created") + return false + } + issueExists := containsOpenIssue(href, addressedRepositories) + if issueExists { + log.Printf("issue already exists for %s\n", href) + } else { + isGithubRepo := reGithubRepo.MatchString(href) + if isGithubRepo { + isRepoAdded := testRepoState(true, href, client, &staleRepos) + isRepoAdded = testCommitAge(!isRepoAdded, href, client, &staleRepos) + if isRepoAdded { + ctr++ + } + } else { + log.Printf("%s non-github repo not currently handled", href) + } + } + return true + }) + createIssue(staleRepos, client) +} + +func main() { + f, err := os.OpenFile("test_stale_repositories_log", os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0666) + if err != nil { + log.Println("FAILED TO INIT LOG FILE") + return + } + log.SetOutput(f) + testStaleRepository() +}