fix(detector/github): Enhance the dependency graph API call on the big repository (#1681)
* fix: Reduce the number of data to be fetched per page, when retrying after a timeout failure on Dependency Graph API * check rate limit on dependency graph API * comment
This commit is contained in:
@@ -10,6 +10,7 @@ import (
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"strconv"
|
||||
"time"
|
||||
|
||||
"github.com/cenkalti/backoff"
|
||||
@@ -218,65 +219,85 @@ func DetectGitHubDependencyGraph(r *models.ScanResult, owner, repo, token string
|
||||
//TODO Proxy
|
||||
httpClient := oauth2.NewClient(context.Background(), src)
|
||||
|
||||
return fetchDependencyGraph(r, httpClient, owner, repo, "", "")
|
||||
return fetchDependencyGraph(r, httpClient, owner, repo, "", "", 10, 100)
|
||||
}
|
||||
|
||||
// recursive function
|
||||
func fetchDependencyGraph(r *models.ScanResult, httpClient *http.Client, owner, repo, after, dependenciesAfter string) (err error) {
|
||||
func fetchDependencyGraph(r *models.ScanResult, httpClient *http.Client, owner, repo, after, dependenciesAfter string, first, dependenciesFirst int) (err error) {
|
||||
const queryFmt = `{"query":
|
||||
"query { repository(owner:\"%s\", name:\"%s\") { url dependencyGraphManifests(first: %d, withDependencies: true%s) { pageInfo { endCursor hasNextPage } edges { node { blobPath filename repository { url } parseable exceedsMaxSize dependenciesCount dependencies(first: %d%s) { pageInfo { endCursor hasNextPage } edges { node { packageName packageManager repository { url } requirements hasDependencies } } } } } } } }"}`
|
||||
|
||||
queryStr := fmt.Sprintf(queryFmt, owner, repo, 10, after, 100, dependenciesAfter)
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Minute)
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodPost,
|
||||
"https://api.github.com/graphql",
|
||||
bytes.NewBuffer([]byte(queryStr)),
|
||||
)
|
||||
defer cancel()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// https://docs.github.com/en/graphql/overview/schema-previews#access-to-a-repository-s-dependency-graph-preview
|
||||
// TODO remove this header if it is no longer preview status in the future.
|
||||
req.Header.Set("Accept", "application/vnd.github.hawkgirl-preview+json")
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
|
||||
var graph DependencyGraph
|
||||
rateLimitRemaining := 5000
|
||||
count, retryMax := 0, 10
|
||||
countCheck := func(err error) error {
|
||||
retryCheck := func(err error) error {
|
||||
if count == retryMax {
|
||||
return backoff.Permanent(err)
|
||||
}
|
||||
if rateLimitRemaining == 0 {
|
||||
// The GraphQL API rate limit is 5,000 points per hour.
|
||||
// Terminate with an error on rate limit reached.
|
||||
return backoff.Permanent(errof.New(errof.ErrFailedToAccessGithubAPI,
|
||||
fmt.Sprintf("rate limit exceeded. error: %s", err.Error())))
|
||||
}
|
||||
return err
|
||||
}
|
||||
operation := func() error {
|
||||
count++
|
||||
queryStr := fmt.Sprintf(queryFmt, owner, repo, first, after, dependenciesFirst, dependenciesAfter)
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodPost,
|
||||
"https://api.github.com/graphql",
|
||||
bytes.NewBuffer([]byte(queryStr)),
|
||||
)
|
||||
if err != nil {
|
||||
return retryCheck(err)
|
||||
}
|
||||
|
||||
// https://docs.github.com/en/graphql/overview/schema-previews#access-to-a-repository-s-dependency-graph-preview
|
||||
// TODO remove this header if it is no longer preview status in the future.
|
||||
req.Header.Set("Accept", "application/vnd.github.hawkgirl-preview+json")
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
|
||||
resp, err := httpClient.Do(req)
|
||||
if err != nil {
|
||||
return countCheck(err)
|
||||
return retryCheck(err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
// https://docs.github.com/en/graphql/overview/resource-limitations#rate-limit
|
||||
if rateLimitRemaining, err = strconv.Atoi(resp.Header.Get("X-RateLimit-Remaining")); err != nil {
|
||||
// If the header retrieval fails, rateLimitRemaining will be set to 0,
|
||||
// preventing further retries. To enable retry, we reset it to 5000.
|
||||
rateLimitRemaining = 5000
|
||||
return retryCheck(errof.New(errof.ErrFailedToAccessGithubAPI, "Failed to get X-RateLimit-Remaining header"))
|
||||
}
|
||||
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return countCheck(err)
|
||||
return retryCheck(err)
|
||||
}
|
||||
|
||||
graph = DependencyGraph{}
|
||||
if err := json.Unmarshal(body, &graph); err != nil {
|
||||
return countCheck(err)
|
||||
return retryCheck(err)
|
||||
}
|
||||
|
||||
if len(graph.Errors) > 0 || graph.Data.Repository.URL == "" {
|
||||
return countCheck(errof.New(errof.ErrFailedToAccessGithubAPI,
|
||||
// this mainly occurs on timeout
|
||||
// reduce the number of dependencies to be fetched for the next retry
|
||||
if dependenciesFirst > 50 {
|
||||
dependenciesFirst -= 5
|
||||
}
|
||||
return retryCheck(errof.New(errof.ErrFailedToAccessGithubAPI,
|
||||
fmt.Sprintf("Failed to access to GitHub API. Repository: %s/%s; Response: %s", owner, repo, string(body))))
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
notify := func(err error, t time.Duration) {
|
||||
logging.Log.Warnf("Failed trial (count: %d). retrying in %s. err: %+v", count, t, err)
|
||||
logging.Log.Warnf("Failed attempts (count: %d). retrying in %s. err: %+v", count, t, err)
|
||||
}
|
||||
|
||||
if err = backoff.RetryNotify(operation, backoff.NewExponentialBackOff(), notify); err != nil {
|
||||
@@ -309,12 +330,12 @@ func fetchDependencyGraph(r *models.ScanResult, httpClient *http.Client, owner,
|
||||
}
|
||||
}
|
||||
if dependenciesAfter != "" {
|
||||
return fetchDependencyGraph(r, httpClient, owner, repo, after, dependenciesAfter)
|
||||
return fetchDependencyGraph(r, httpClient, owner, repo, after, dependenciesAfter, first, dependenciesFirst)
|
||||
}
|
||||
|
||||
if graph.Data.Repository.DependencyGraphManifests.PageInfo.HasNextPage {
|
||||
after = fmt.Sprintf(`, after: \"%s\"`, graph.Data.Repository.DependencyGraphManifests.PageInfo.EndCursor)
|
||||
return fetchDependencyGraph(r, httpClient, owner, repo, after, dependenciesAfter)
|
||||
return fetchDependencyGraph(r, httpClient, owner, repo, after, dependenciesAfter, first, dependenciesFirst)
|
||||
}
|
||||
|
||||
return nil
|
||||
|
||||
Reference in New Issue
Block a user