Skip to content
Open
11 changes: 1 addition & 10 deletions pkg/sources/github/github.go
Original file line number Diff line number Diff line change
Expand Up @@ -449,16 +449,7 @@ func (s *Source) Enumerate(ctx context.Context, reporter sources.UnitReporter) e
// filteredRepoCache have an entry in the repoInfoCache.
for _, repo := range s.filteredRepoCache.Values() {
// Extract the repository name from the URL for filtering
repoName := repo
if strings.Contains(repo, "/") {
// Try to extract org/repo name from URL
if strings.Contains(repo, "github.com") {
parts := strings.Split(repo, "/")
if len(parts) >= 2 {
repoName = parts[len(parts)-2] + "/" + strings.TrimSuffix(parts[len(parts)-1], ".git")
}
}
}
repoName := extractRepoNameFromUrl(repo)

// Final filter check - only include repositories that pass the filter
if s.filteredRepoCache.wantRepo(repoName) {
Expand Down
73 changes: 73 additions & 0 deletions pkg/sources/github/github_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -489,6 +489,38 @@ func TestNormalizeRepo(t *testing.T) {
assert.Contains(t, err.Error(), "no repositories found")
}

func TestNormalizeRepo_Enterprise(t *testing.T) {
tests := []struct {
name string
endpoint string
wantResult string
}{
{
name: "only host",
endpoint: "https://example.com",
wantResult: "https://example.com/org/repo.git",
},
{
name: "host with path",
endpoint: "https://example.com/api/v3",
wantResult: "https://example.com/org/repo.git",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
source := Source{
conn: &sourcespb.GitHub{
Endpoint: tt.endpoint,
},
}

result, err := source.normalizeRepo("org/repo")
assert.NoError(t, err)
assert.Equal(t, tt.wantResult, result)
})
}
}

func TestHandleRateLimit(t *testing.T) {
s := initTestSource(&sourcespb.GitHub{Credential: &sourcespb.GitHub_Unauthenticated{}})
ctx := context.Background()
Expand Down Expand Up @@ -1237,3 +1269,44 @@ func TestFixBothUnitErrAndUnitOKCalled(t *testing.T) {
assert.True(t, errCalled)
assert.False(t, okCalled)
}

func TestExtractRepoNameFromURL(t *testing.T) {
tests := []struct {
name string
url string
expected string
}{
{
name: "git URL",
url: "https://github.com/org/repo.git",
expected: "org/repo",
},
{
name: "git URL with trailing slash",
url: "https://github.com/org/repo.git/",
expected: "org/repo",
},
{
name: "git URL without .git",
url: "https://github.com/org/repo",
expected: "org/repo",
},
{
name: "git enterprise URL",
url: "https://example-enterprise.com/org/repo.git",
expected: "org/repo",
},
{
name: "just org/repo",
url: "org/repo",
expected: "org/repo",
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result := extractRepoNameFromUrl(tt.url)
assert.Equal(t, tt.expected, result)
})
}
}
23 changes: 23 additions & 0 deletions pkg/sources/github/repo.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"fmt"
"io"
"net/http"
"net/url"
"regexp"
"strings"
"sync"
Expand Down Expand Up @@ -365,8 +366,30 @@ func (s *Source) normalizeRepo(repo string) (string, error) {
// If it's a repository name (contains / but not http), convert to full URL first
if strings.Contains(repo, "/") && !regexp.MustCompile(`^[a-z]+://`).MatchString(repo) {
fullURL := "https://github.com/" + repo
// If using GitHub Enterprise, adjust the URL accordingly
if s.conn != nil && s.conn.Endpoint != "" {
u, err := url.Parse(s.conn.Endpoint)
if err != nil {
return "", fmt.Errorf("invalid enterprise endpoint: %w", err)
}
// we want to remove any path components from the endpoint and just use the host
u.Path = "/" + repo
fullURL = u.String()
}
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Enterprise check uses wrong condition for standard GitHub

High Severity

The normalizeRepo function uses s.conn.Endpoint != "" to detect GitHub Enterprise, but this condition is also true when users explicitly set the endpoint to "https://api.github.com" (standard GitHub API). In this case, the function incorrectly generates repository URLs using api.github.com as the host (e.g., https://api.github.com/org/repo) instead of the correct github.com (e.g., https://github.com/org/repo). The connector code correctly handles this by checking if the endpoint equals cloudV3Endpoint or matches endsWithGithub, but normalizeRepo does not use the same logic.

Fix in Cursor Fix in Web

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is this related to this?

return giturl.NormalizeGithubRepo(fullURL)
}

return "", fmt.Errorf("no repositories found for %s", repo)
}

// extractRepoNameFromUrl extracts the "owner/repo" name from a GitHub repository URL.
// Example: http://github.com/owner/repo.git -> owner/repo
// If an invalid URL is provided, it returns the original string.
func extractRepoNameFromUrl(repoURL string) string {
u, err := url.Parse(repoURL)
if err != nil {
return repoURL
}
cleanedPath := strings.Trim(u.Path, "/")
return strings.TrimSuffix(cleanedPath, ".git")
}
Loading