From 6271ec522ed941af996f5d19d3c92f0dcc7685d0 Mon Sep 17 00:00:00 2001 From: Sinclair Date: Fri, 26 May 2023 14:39:02 +0900 Subject: [PATCH] fix(detector/github): Enhance the dependency graph API call on the big repository (#1681) * fix: Reduce the number of data to be fetched per page, when retrying after a timeout failure on Dependency Graph API * check rate limit on dependency graph API * comment --- detector/github.go | 69 ++++++++++++++++++++++++++++++---------------- 1 file changed, 45 insertions(+), 24 deletions(-) diff --git a/detector/github.go b/detector/github.go index 90f0d340..1c6d2e9e 100644 --- a/detector/github.go +++ b/detector/github.go @@ -10,6 +10,7 @@ import ( "fmt" "io" "net/http" + "strconv" "time" "github.com/cenkalti/backoff" @@ -218,65 +219,85 @@ func DetectGitHubDependencyGraph(r *models.ScanResult, owner, repo, token string //TODO Proxy httpClient := oauth2.NewClient(context.Background(), src) - return fetchDependencyGraph(r, httpClient, owner, repo, "", "") + return fetchDependencyGraph(r, httpClient, owner, repo, "", "", 10, 100) } // recursive function -func fetchDependencyGraph(r *models.ScanResult, httpClient *http.Client, owner, repo, after, dependenciesAfter string) (err error) { +func fetchDependencyGraph(r *models.ScanResult, httpClient *http.Client, owner, repo, after, dependenciesAfter string, first, dependenciesFirst int) (err error) { const queryFmt = `{"query": "query { repository(owner:\"%s\", name:\"%s\") { url dependencyGraphManifests(first: %d, withDependencies: true%s) { pageInfo { endCursor hasNextPage } edges { node { blobPath filename repository { url } parseable exceedsMaxSize dependenciesCount dependencies(first: %d%s) { pageInfo { endCursor hasNextPage } edges { node { packageName packageManager repository { url } requirements hasDependencies } } } } } } } }"}` - - queryStr := fmt.Sprintf(queryFmt, owner, repo, 10, after, 100, dependenciesAfter) ctx, cancel := context.WithTimeout(context.Background(), 10*time.Minute) - req, err := http.NewRequestWithContext(ctx, http.MethodPost, - "https://api.github.com/graphql", - bytes.NewBuffer([]byte(queryStr)), - ) defer cancel() - if err != nil { - return err - } - - // https://docs.github.com/en/graphql/overview/schema-previews#access-to-a-repository-s-dependency-graph-preview - // TODO remove this header if it is no longer preview status in the future. - req.Header.Set("Accept", "application/vnd.github.hawkgirl-preview+json") - req.Header.Set("Content-Type", "application/json") var graph DependencyGraph + rateLimitRemaining := 5000 count, retryMax := 0, 10 - countCheck := func(err error) error { + retryCheck := func(err error) error { if count == retryMax { return backoff.Permanent(err) } + if rateLimitRemaining == 0 { + // The GraphQL API rate limit is 5,000 points per hour. + // Terminate with an error on rate limit reached. + return backoff.Permanent(errof.New(errof.ErrFailedToAccessGithubAPI, + fmt.Sprintf("rate limit exceeded. error: %s", err.Error()))) + } return err } operation := func() error { count++ + queryStr := fmt.Sprintf(queryFmt, owner, repo, first, after, dependenciesFirst, dependenciesAfter) + req, err := http.NewRequestWithContext(ctx, http.MethodPost, + "https://api.github.com/graphql", + bytes.NewBuffer([]byte(queryStr)), + ) + if err != nil { + return retryCheck(err) + } + + // https://docs.github.com/en/graphql/overview/schema-previews#access-to-a-repository-s-dependency-graph-preview + // TODO remove this header if it is no longer preview status in the future. + req.Header.Set("Accept", "application/vnd.github.hawkgirl-preview+json") + req.Header.Set("Content-Type", "application/json") + resp, err := httpClient.Do(req) if err != nil { - return countCheck(err) + return retryCheck(err) } defer resp.Body.Close() + // https://docs.github.com/en/graphql/overview/resource-limitations#rate-limit + if rateLimitRemaining, err = strconv.Atoi(resp.Header.Get("X-RateLimit-Remaining")); err != nil { + // If the header retrieval fails, rateLimitRemaining will be set to 0, + // preventing further retries. To enable retry, we reset it to 5000. + rateLimitRemaining = 5000 + return retryCheck(errof.New(errof.ErrFailedToAccessGithubAPI, "Failed to get X-RateLimit-Remaining header")) + } + body, err := io.ReadAll(resp.Body) if err != nil { - return countCheck(err) + return retryCheck(err) } graph = DependencyGraph{} if err := json.Unmarshal(body, &graph); err != nil { - return countCheck(err) + return retryCheck(err) } if len(graph.Errors) > 0 || graph.Data.Repository.URL == "" { - return countCheck(errof.New(errof.ErrFailedToAccessGithubAPI, + // this mainly occurs on timeout + // reduce the number of dependencies to be fetched for the next retry + if dependenciesFirst > 50 { + dependenciesFirst -= 5 + } + return retryCheck(errof.New(errof.ErrFailedToAccessGithubAPI, fmt.Sprintf("Failed to access to GitHub API. Repository: %s/%s; Response: %s", owner, repo, string(body)))) } return nil } notify := func(err error, t time.Duration) { - logging.Log.Warnf("Failed trial (count: %d). retrying in %s. err: %+v", count, t, err) + logging.Log.Warnf("Failed attempts (count: %d). retrying in %s. err: %+v", count, t, err) } if err = backoff.RetryNotify(operation, backoff.NewExponentialBackOff(), notify); err != nil { @@ -309,12 +330,12 @@ func fetchDependencyGraph(r *models.ScanResult, httpClient *http.Client, owner, } } if dependenciesAfter != "" { - return fetchDependencyGraph(r, httpClient, owner, repo, after, dependenciesAfter) + return fetchDependencyGraph(r, httpClient, owner, repo, after, dependenciesAfter, first, dependenciesFirst) } if graph.Data.Repository.DependencyGraphManifests.PageInfo.HasNextPage { after = fmt.Sprintf(`, after: \"%s\"`, graph.Data.Repository.DependencyGraphManifests.PageInfo.EndCursor) - return fetchDependencyGraph(r, httpClient, owner, repo, after, dependenciesAfter) + return fetchDependencyGraph(r, httpClient, owner, repo, after, dependenciesAfter, first, dependenciesFirst) } return nil