-
Notifications
You must be signed in to change notification settings - Fork 2.2k
[INS-285] Fix custom detectors line number reporting to match the full regex instead of capture group #4697
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 2 commits
c03a57c
eb821ce
af44c76
8c63a41
2a7cd5f
6420f1c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -66,6 +66,9 @@ func NewWebhookCustomRegex(pb *custom_detectorspb.CustomRegex) (*CustomRegexWebh | |
| } | ||
| } | ||
|
|
||
| // Ensure primary regex name is set. | ||
| ensurePrimaryRegexNameSet(pb) | ||
|
|
||
| // TODO: Copy only necessary data out of pb. | ||
| return &CustomRegexWebhook{pb}, nil | ||
| } | ||
|
|
@@ -229,14 +232,16 @@ func (c *CustomRegexWebhook) createResults(ctx context.Context, match map[string | |
| values := match[key] | ||
| // values[0] contains the entire regex match. | ||
| secret := values[0] | ||
| fullMatch := values[0] | ||
| if len(values) > 1 { | ||
| secret = values[1] | ||
| } | ||
| raw += secret | ||
|
|
||
| // if the match is of the primary regex, set it's value as primary secret value in result | ||
| // if the match is of the primary regex, set it's full match as primary secret value in result | ||
|
||
| if c.PrimaryRegexName == key { | ||
| result.SetPrimarySecretValue(secret) | ||
| // we're using the full match here to ensure correct line number reporting | ||
| result.SetPrimarySecretValue(fullMatch) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add a detailed comment here why are we using fullMatch instead of secret here. |
||
| } | ||
| } | ||
|
|
||
|
|
@@ -394,3 +399,14 @@ func (c *CustomRegexWebhook) Description() string { | |
| } | ||
| return c.GetDescription() | ||
| } | ||
|
|
||
| // ensurePrimaryRegexNameSet sets the PrimaryRegexName field to the first | ||
| // regex name if it is not already set. | ||
| func ensurePrimaryRegexNameSet(pb *custom_detectorspb.CustomRegex) { | ||
| if pb.PrimaryRegexName == "" { | ||
| for name := range pb.Regex { | ||
| pb.PrimaryRegexName = name | ||
| return | ||
| } | ||
| } | ||
cursor[bot] marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -232,6 +232,61 @@ func TestDetectorPrimarySecret(t *testing.T) { | |
| assert.Equal(t, "secret_YI7C90ACY1_yy", results[0].GetPrimarySecretValue()) | ||
| } | ||
|
|
||
| func TestDetectorPrimarySecretFullMatch(t *testing.T) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Write a test case for a match which span multiple lines.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We need to write a test case in the engine too.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The test case should include the full regex match appearing across multiple lines, while the actual secret exists on only one line. The primary secret value will span multiple lines and should resolve to the line number where the match starts. I want to confirm how the primary secret value is processed when it contains multi-line values - specifically, how the engine determines and reports the starting line.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added |
||
| tests := []struct { | ||
| name string | ||
| input *custom_detectorspb.CustomRegex | ||
| chunk []byte | ||
| want string | ||
| }{ | ||
| { | ||
| name: "primary regex full match", | ||
| input: &custom_detectorspb.CustomRegex{ | ||
| Name: "test", | ||
| Keywords: []string{"secret"}, | ||
| Regex: map[string]string{"secret": `secret *= *"([^"\r\n]+)"`}, | ||
| PrimaryRegexName: "secret", | ||
| }, | ||
| chunk: []byte(` | ||
| // some code | ||
| secret="mysecret" | ||
| // some code | ||
| `), | ||
| want: `secret="mysecret"`, | ||
| }, | ||
| // Write a test case for a match which span multiple lines. | ||
| { | ||
| name: "primary regex full match multiline", | ||
| input: &custom_detectorspb.CustomRegex{ | ||
| Name: "test", | ||
| Keywords: []string{"secret"}, | ||
| Regex: map[string]string{"secret": `secret *= *"([^"]+)"`}, | ||
| PrimaryRegexName: "secret", | ||
| }, | ||
| chunk: []byte(` | ||
| // some code | ||
| secret="mysecret | ||
| thatspansmultiplelines" | ||
| // some code | ||
| `), | ||
| want: `secret="mysecret | ||
| thatspansmultiplelines"`, | ||
| }, | ||
| } | ||
|
|
||
| for _, tt := range tests { | ||
| t.Run(tt.name, func(t *testing.T) { | ||
| detector, err := NewWebhookCustomRegex(tt.input) | ||
| assert.NoError(t, err) | ||
| results, err := detector.FromData(context.Background(), false, tt.chunk) | ||
| assert.NoError(t, err) | ||
| assert.Equal(t, 1, len(results)) | ||
| assert.Equal(t, tt.want, results[0].GetPrimarySecretValue()) | ||
| }) | ||
| } | ||
|
|
||
| } | ||
|
|
||
| func TestDetectorValidations(t *testing.T) { | ||
| type args struct { | ||
| CustomRegex *custom_detectorspb.CustomRegex | ||
|
|
@@ -707,6 +762,24 @@ func TestNewWebhookCustomRegex_Validation(t *testing.T) { | |
| } | ||
| } | ||
|
|
||
| func TestNewWebhookCustomRegex_EnsurePrimaryRegexNameSet(t *testing.T) { | ||
| t.Parallel() | ||
|
|
||
| pb := &custom_detectorspb.CustomRegex{ | ||
| Name: "test", | ||
| Keywords: []string{"kw"}, | ||
| Regex: map[string]string{ | ||
| "first": `first_regex`, | ||
| "second": `second_regex`, | ||
| }, | ||
| // PrimaryRegexName is not set. | ||
| } | ||
|
|
||
| detector, err := NewWebhookCustomRegex(pb) | ||
| assert.NoError(t, err) | ||
| assert.Equal(t, "first", detector.GetPrimaryRegexName(), "expected PrimaryRegexName to be set to the first regex name") | ||
| } | ||
|
|
||
| func BenchmarkProductIndices(b *testing.B) { | ||
| for i := 0; i < b.N; i++ { | ||
| _ = productIndices(3, 2, 6) | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Since we now always set the primary regex ourselves, is it still necessary to expose this as a user-configurable option and document it?
We should properly document this behavior in code, clearly explaining how and why the primary regex is always used for full matches.
We should explain this in the README as well: going forward, the custom detector line number will point to the full match of the primary regex or, if none is explicitly set, the first regex. This is important because a match may span multiple lines.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The user may want to set a regex other than the first one as the primary regex, so I think this option should still be exposed.
Totally agree with the documentation part! I'll do that