diff --git a/pkg/decoders/escaped_unicode.go b/pkg/decoders/escaped_unicode.go index f3f6462b357b..b8574bc104a9 100644 --- a/pkg/decoders/escaped_unicode.go +++ b/pkg/decoders/escaped_unicode.go @@ -111,7 +111,7 @@ func (d *EscapedUnicode) FromChunk(chunk *sources.Chunk) *DecodableChunk { SecretID: chunk.SecretID, SourceMetadata: chunk.SourceMetadata, SourceType: chunk.SourceType, - Verify: chunk.Verify, + SourceVerify: chunk.SourceVerify, }, } } else { diff --git a/pkg/engine/engine.go b/pkg/engine/engine.go index 1802e89ae13e..effa4c75abab 100644 --- a/pkg/engine/engine.go +++ b/pkg/engine/engine.go @@ -774,6 +774,7 @@ type detectableChunk struct { chunk sources.Chunk decoder detectorspb.DecoderType wgDoneFn func() + verify bool } // verificationOverlapChunk is a decoded chunk that has multiple detectors that match it. @@ -792,7 +793,7 @@ func (e *Engine) scannerWorker(ctx context.Context) { for chunk := range e.ChunksChan() { startTime := time.Now() - sourceVerify := chunk.Verify + sourceVerify := chunk.SourceVerify for _, decoder := range e.decoders { decodeStart := time.Now() // This copy is needed to preserve the original chunk.Data across multiple decoders. @@ -819,12 +820,12 @@ func (e *Engine) scannerWorker(ctx context.Context) { } for _, detector := range matchingDetectors { - decoded.Chunk.Verify = e.shouldVerifyChunk(sourceVerify, detector, e.detectorVerificationOverrides) wgDetect.Add(1) e.detectableChunksChan <- detectableChunk{ chunk: *decoded.Chunk, detector: detector, decoder: decoded.DecoderType, + verify: e.shouldVerifyChunk(sourceVerify, detector, e.detectorVerificationOverrides), wgDoneFn: wgDetect.Done, } } @@ -1004,6 +1005,7 @@ func (e *Engine) verificationOverlapWorker(ctx context.Context) { chunk: chunk.chunk, detector: detector, decoder: chunk.decoder, + verify: false, wgDoneFn: wgDetect.Done, }, res, @@ -1022,11 +1024,11 @@ func (e *Engine) verificationOverlapWorker(ctx context.Context) { for _, detector := range detectorKeysWithResults { wgDetect.Add(1) - chunk.chunk.Verify = e.shouldVerifyChunk(chunk.chunk.Verify, detector, e.detectorVerificationOverrides) e.detectableChunksChan <- detectableChunk{ chunk: chunk.chunk, detector: detector, decoder: chunk.decoder, + verify: e.shouldVerifyChunk(chunk.chunk.SourceVerify, detector, e.detectorVerificationOverrides), wgDoneFn: wgDetect.Done, } } @@ -1089,7 +1091,7 @@ func (e *Engine) detectChunk(ctx context.Context, data detectableChunk) { results, err := e.verificationCache.FromData( ctx, data.detector.Detector, - data.chunk.Verify, + data.verify, data.chunk.SecretID != 0, matchBytes) t.Stop() diff --git a/pkg/engine/engine_test.go b/pkg/engine/engine_test.go index 18800715f689..c0f4dd8a5158 100644 --- a/pkg/engine/engine_test.go +++ b/pkg/engine/engine_test.go @@ -1355,76 +1355,114 @@ func (p passthroughDecoder) FromChunk(chunk *sources.Chunk) *decoders.DecodableC func (p passthroughDecoder) Type() detectorspb.DecoderType { return detectorspb.DecoderType(-1) } +// TestEngine_DetectChunk_UsesVerifyFlag validates that detectChunk correctly forwards detectableChunk.verify to +// detectors. func TestEngine_DetectChunk_UsesVerifyFlag(t *testing.T) { ctx := context.Background() - // Arrange: Create a minimal engine. - e := &Engine{ - results: make(chan detectors.ResultWithMetadata, 1), - verificationCache: verificationcache.New(nil, &verificationcache.InMemoryMetrics{}), + testCases := []struct { + name string + verify bool + }{ + {name: "verify=true", verify: true}, + {name: "verify=false", verify: false}, } - // Arrange: Create a detector match. We can't create one directly, so we have to use a minimal A-H core. - ahcore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{passthroughDetector{keywords: []string{"keyword"}}}) - detectorMatches := ahcore.FindDetectorMatches([]byte("keyword")) - require.Len(t, detectorMatches, 1) + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + // Arrange: Create a minimal engine. + e := &Engine{ + results: make(chan detectors.ResultWithMetadata, 1), + verificationCache: verificationcache.New(nil, &verificationcache.InMemoryMetrics{}), + } - // Arrange: Create a chunk to detect. - chunk := detectableChunk{ - chunk: sources.Chunk{ - Verify: true, - }, - detector: detectorMatches[0], - wgDoneFn: func() {}, - } + // Arrange: Create a detector match. We can't create one directly, so we have to use a minimal A-H core. + ahcore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{passthroughDetector{keywords: []string{"keyword"}}}) + detectorMatches := ahcore.FindDetectorMatches([]byte("keyword")) + require.Len(t, detectorMatches, 1) + + // Arrange: Create a chunk to detect. + chunk := detectableChunk{ + detector: detectorMatches[0], + verify: tc.verify, + wgDoneFn: func() {}, + } - // Act - e.detectChunk(ctx, chunk) - close(e.results) + // Act + e.detectChunk(ctx, chunk) + close(e.results) - // Assert: Confirm that a result was generated and that it has the expected verify flag. - select { - case result := <-e.results: - assert.True(t, result.Result.Verified) - default: - t.Errorf("expected a result but did not get one") + // Assert: Confirm that a result was generated and that it has the expected verify flag. + select { + case result := <-e.results: + assert.Equal(t, tc.verify, result.Result.Verified) + default: + t.Errorf("expected a result but did not get one") + } + }) } } +// TestEngine_ScannerWorker_DetectableChunkHasCorrectVerifyFlag validates that scannerWorker generates detectableChunk +// structs that have the correct verify flag set. It also validates that the original chunks' SourceVerify flags are +// unchanged. func TestEngine_ScannerWorker_DetectableChunkHasCorrectVerifyFlag(t *testing.T) { ctx := context.Background() - // Arrange: Create a minimal engine. - detector := &passthroughDetector{keywords: []string{"keyword"}} - e := &Engine{ - AhoCorasickCore: ahocorasick.NewAhoCorasickCore([]detectors.Detector{detector}), - decoders: []decoders.Decoder{passthroughDecoder{}}, - detectableChunksChan: make(chan detectableChunk, 1), - sourceManager: sources.NewManager(), - verify: true, + testCases := []struct { + name string + engineVerify bool + sourceVerify bool + wantVerify bool + }{ + {name: "engineVerify=false,sourceVerify=false", engineVerify: false, sourceVerify: false, wantVerify: false}, + {name: "engineVerify=false,sourceVerify=true", engineVerify: false, sourceVerify: true, wantVerify: false}, + {name: "engineVerify=true,sourceVerify=false", engineVerify: true, sourceVerify: false, wantVerify: false}, + {name: "engineVerify=true,sourceVerify=true", engineVerify: true, sourceVerify: true, wantVerify: true}, } - // Arrange: Create a chunk to scan. - chunk := sources.Chunk{ - Data: []byte("keyword"), - Verify: true, - } + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + // Arrange: Create a minimal engine. + detector := &passthroughDetector{keywords: []string{"keyword"}} + e := &Engine{ + AhoCorasickCore: ahocorasick.NewAhoCorasickCore([]detectors.Detector{detector}), + decoders: []decoders.Decoder{passthroughDecoder{}}, + detectableChunksChan: make(chan detectableChunk, 1), + sourceManager: sources.NewManager(), + verify: tc.engineVerify, + } + + // Arrange: Create a chunk to scan. + chunk := sources.Chunk{ + Data: []byte("keyword"), + SourceVerify: tc.sourceVerify, + } - // Arrange: Enqueue a chunk to be scanned. - e.sourceManager.ScanChunk(&chunk) + // Arrange: Enqueue a chunk to be scanned. + e.sourceManager.ScanChunk(&chunk) - // Act - go e.scannerWorker(ctx) + // Act + go e.scannerWorker(ctx) - // Assert: Confirm that a chunk was generated and that it has the expected verify flag. - select { - case chunk := <-e.detectableChunksChan: - assert.True(t, chunk.chunk.Verify) - case <-time.After(1 * time.Second): - t.Errorf("expected a detectableChunk but did not get one") + // Assert: Confirm that a chunk was generated, that its SourceVerify flag is unchanged, and that its verify + // flag is correctly set. + select { + case chunk := <-e.detectableChunksChan: + assert.Equal(t, tc.sourceVerify, chunk.chunk.SourceVerify) + assert.Equal(t, tc.wantVerify, chunk.verify) + case <-time.After(1 * time.Second): + t.Errorf("expected a detectableChunk but did not get one") + } + }) } } +// TestEngine_VerificationOverlapWorker_DetectableChunkHasCorrectVerifyFlag validates that the results directly +// generated by verificationOverlapWorker all came from detector invocations with the verify flag cleared (because these +// results were generated from verification overlaps). It also validates that detectableChunk structs generated by +// verificationOverlapWorker have their verify flags correctly set, and that these structs' original chunks' +// SourceVerify flags are unchanged. func TestEngine_VerificationOverlapWorker_DetectableChunkHasCorrectVerifyFlag(t *testing.T) { ctx := context.Background() @@ -1450,8 +1488,8 @@ func TestEngine_VerificationOverlapWorker_DetectableChunkHasCorrectVerifyFlag(t // Arrange: Create a chunk to "scan." chunk := sources.Chunk{ - Data: []byte("keyword ;oahpow8heg;blaisd"), - Verify: true, + Data: []byte("keyword ;oahpow8heg;blaisd"), + SourceVerify: true, } // Arrange: Create overlapping detector matches. We can't create them directly, so we have to use a minimal A-H @@ -1481,11 +1519,13 @@ func TestEngine_VerificationOverlapWorker_DetectableChunkHasCorrectVerifyFlag(t assert.False(t, result.Result.Verified) } - // Assert: Confirm that every generated detectable chunk carries the original Verify flag. + // Assert: Confirm that every generated detectable chunk's Chunk.SourceVerify flag is unchanged and that its + // verify flag is correctly set. // CMR: There should be not be any of these chunks. However, due to what I believe is an unrelated bug, there - // are. This test ensures that even in that erroneous case, their Verify flag is correct. + // are. This test ensures that even in that erroneous case, their contents are correct. for detectableChunk := range processedDetectableChunks { - assert.True(t, detectableChunk.chunk.Verify) + assert.True(t, detectableChunk.verify) + assert.True(t, detectableChunk.chunk.SourceVerify) } }) t.Run("no overlap", func(t *testing.T) { @@ -1509,8 +1549,8 @@ func TestEngine_VerificationOverlapWorker_DetectableChunkHasCorrectVerifyFlag(t // Arrange: Create a chunk to "scan." chunk := sources.Chunk{ - Data: []byte("keyword ;oahpow8heg;blaisd"), - Verify: true, + Data: []byte("keyword ;oahpow8heg;blaisd"), + SourceVerify: true, } // Arrange: Create non-overlapping detector matches. We can't create them directly, so we have to use a minimal @@ -1534,9 +1574,10 @@ func TestEngine_VerificationOverlapWorker_DetectableChunkHasCorrectVerifyFlag(t close(e.detectableChunksChan) close(processedDetectableChunks) - // Assert: Confirm that every generated detectable chunk carries the original Verify flag. + // Assert: Confirm that SourceVerify flags are unchanged, and verify flags are correctly set. for detectableChunk := range processedDetectableChunks { - assert.True(t, detectableChunk.chunk.Verify) + assert.True(t, detectableChunk.chunk.SourceVerify) + assert.True(t, detectableChunk.verify) } }) } diff --git a/pkg/sources/circleci/circleci.go b/pkg/sources/circleci/circleci.go index 667974b88bc8..5dee11109ee2 100644 --- a/pkg/sources/circleci/circleci.go +++ b/pkg/sources/circleci/circleci.go @@ -375,7 +375,7 @@ func (s *Source) chunk(ctx context.Context, proj project, buildNum BuildNum, ste }, }, }, - Verify: s.verify, + SourceVerify: s.verify, } if err := data.Error(); err != nil { return err diff --git a/pkg/sources/docker/docker.go b/pkg/sources/docker/docker.go index 25a22f72d7c1..3f02232c9381 100644 --- a/pkg/sources/docker/docker.go +++ b/pkg/sources/docker/docker.go @@ -342,8 +342,8 @@ func (s *Source) processHistoryEntry(ctx context.Context, historyInfo historyEnt }, }, }, - Verify: s.verify, - Data: []byte(historyInfo.entry.CreatedBy), + SourceVerify: s.verify, + Data: []byte(historyInfo.entry.CreatedBy), } ctx.Logger().V(2).Info("scanning image history entry", "index", historyInfo.index, "layer", historyInfo.layerDigest) @@ -455,7 +455,7 @@ func (s *Source) processChunk(ctx context.Context, info chunkProcessingInfo, chu }, }, }, - Verify: s.verify, + SourceVerify: s.verify, } chunk.Data = data.Bytes() diff --git a/pkg/sources/elasticsearch/elasticsearch.go b/pkg/sources/elasticsearch/elasticsearch.go index 2d8226e4c401..a7bdc95f8bc9 100644 --- a/pkg/sources/elasticsearch/elasticsearch.go +++ b/pkg/sources/elasticsearch/elasticsearch.go @@ -202,7 +202,7 @@ func (s *Source) Chunks( }, }, }, - Verify: s.verify, + SourceVerify: s.verify, } chunk.Data = []byte(document.message) diff --git a/pkg/sources/filesystem/filesystem.go b/pkg/sources/filesystem/filesystem.go index 9d99ae67e7c7..fa76cafec13f 100644 --- a/pkg/sources/filesystem/filesystem.go +++ b/pkg/sources/filesystem/filesystem.go @@ -215,7 +215,7 @@ func (s *Source) scanFile(ctx context.Context, path string, chunksChan chan *sou }, }, }, - Verify: s.verify, + SourceVerify: s.verify, } return handlers.HandleFile(fileCtx, inputFile, chunkSkel, sources.ChanReporter{Ch: chunksChan}) diff --git a/pkg/sources/gcs/gcs.go b/pkg/sources/gcs/gcs.go index f23c4f9ee529..413dde690dae 100644 --- a/pkg/sources/gcs/gcs.go +++ b/pkg/sources/gcs/gcs.go @@ -333,11 +333,11 @@ func (s *Source) completeProgress(ctx context.Context) { func (s *Source) processObject(ctx context.Context, o object) error { chunkSkel := &sources.Chunk{ - SourceName: s.name, - SourceType: s.Type(), - JobID: s.JobID(), - SourceID: s.sourceId, - Verify: s.verify, + SourceName: s.name, + SourceType: s.Type(), + JobID: s.JobID(), + SourceID: s.sourceId, + SourceVerify: s.verify, SourceMetadata: &source_metadatapb.MetaData{ Data: &source_metadatapb.MetaData_Gcs{ Gcs: &source_metadatapb.GCS{ diff --git a/pkg/sources/gcs/gcs_integration_test.go b/pkg/sources/gcs/gcs_integration_test.go index 6c8fe2a09c46..3fa3a8865482 100644 --- a/pkg/sources/gcs/gcs_integration_test.go +++ b/pkg/sources/gcs/gcs_integration_test.go @@ -79,10 +79,10 @@ func TestChunks_PublicBucket(t *testing.T) { want := []*sources.Chunk{ { - SourceName: "test", - SourceType: sourcespb.SourceType_SOURCE_TYPE_GCS, - SourceID: 0, - Verify: true, + SourceName: "test", + SourceType: sourcespb.SourceType_SOURCE_TYPE_GCS, + SourceID: 0, + SourceVerify: true, SourceMetadata: &source_metadatapb.MetaData{ Data: &source_metadatapb.MetaData_Gcs{ Gcs: &source_metadatapb.GCS{ @@ -164,10 +164,10 @@ func createTestChunks() []*sources.Chunk { chunks := make([]*sources.Chunk, 0, len(objects)) for _, o := range objects { chunks = append(chunks, &sources.Chunk{ - SourceName: "test", - SourceType: sourcespb.SourceType_SOURCE_TYPE_GCS, - SourceID: 0, - Verify: true, + SourceName: "test", + SourceType: sourcespb.SourceType_SOURCE_TYPE_GCS, + SourceID: 0, + SourceVerify: true, SourceMetadata: &source_metadatapb.MetaData{ Data: &source_metadatapb.MetaData_Gcs{ Gcs: &source_metadatapb.GCS{ diff --git a/pkg/sources/gcs/gcs_test.go b/pkg/sources/gcs/gcs_test.go index 126facfe40f5..9cde72ffe444 100644 --- a/pkg/sources/gcs/gcs_test.go +++ b/pkg/sources/gcs/gcs_test.go @@ -271,11 +271,11 @@ func createTestObject(id int) object { func createTestSourceChunk(id int) *sources.Chunk { return &sources.Chunk{ - SourceName: "test", - SourceType: sourcespb.SourceType_SOURCE_TYPE_GCS, - SourceID: 0, - Verify: true, - Data: []byte(fmt.Sprintf("hello world %d", id)), + SourceName: "test", + SourceType: sourcespb.SourceType_SOURCE_TYPE_GCS, + SourceID: 0, + SourceVerify: true, + Data: []byte(fmt.Sprintf("hello world %d", id)), SourceMetadata: &source_metadatapb.MetaData{ Data: &source_metadatapb.MetaData_Gcs{ Gcs: &source_metadatapb.GCS{ diff --git a/pkg/sources/git/git.go b/pkg/sources/git/git.go index 52f925b7520c..85a0445f0b5c 100644 --- a/pkg/sources/git/git.go +++ b/pkg/sources/git/git.go @@ -745,7 +745,7 @@ func (s *Git) ScanCommits(ctx context.Context, repo *git.Repository, path string SourceType: s.sourceType, SourceMetadata: metadata, Data: []byte(sb.String()), - Verify: s.verify, + SourceVerify: s.verify, } if err := reporter.ChunkOk(ctx, chunk); err != nil { return err @@ -779,7 +779,7 @@ func (s *Git) ScanCommits(ctx context.Context, repo *git.Repository, path string JobID: s.jobID, SourceType: s.sourceType, SourceMetadata: metadata, - Verify: s.verify, + SourceVerify: s.verify, } if err := HandleBinary(ctx, gitDir, reporter, chunkSkel, commitHash, fileName, s.skipArchives); err != nil { @@ -828,7 +828,7 @@ func (s *Git) ScanCommits(ctx context.Context, repo *git.Repository, path string SourceType: s.sourceType, SourceMetadata: metadata, Data: data, - Verify: s.verify, + SourceVerify: s.verify, } return reporter.ChunkOk(ctx, chunk) } @@ -866,7 +866,7 @@ func (s *Git) gitChunk(ctx context.Context, diff *gitparse.Diff, fileName, email SourceType: s.sourceType, SourceMetadata: metadata, Data: append([]byte{}, newChunkBuffer.Bytes()...), - Verify: s.verify, + SourceVerify: s.verify, } if err := reporter.ChunkOk(ctx, chunk); err != nil { // TODO: Return error. @@ -886,7 +886,7 @@ func (s *Git) gitChunk(ctx context.Context, diff *gitparse.Diff, fileName, email SourceType: s.sourceType, SourceMetadata: metadata, Data: line, - Verify: s.verify, + SourceVerify: s.verify, } if err := reporter.ChunkOk(ctx, chunk); err != nil { // TODO: Return error. @@ -910,7 +910,7 @@ func (s *Git) gitChunk(ctx context.Context, diff *gitparse.Diff, fileName, email SourceType: s.sourceType, SourceMetadata: metadata, Data: append([]byte{}, newChunkBuffer.Bytes()...), - Verify: s.verify, + SourceVerify: s.verify, } if err := reporter.ChunkOk(ctx, chunk); err != nil { // TODO: Return error. @@ -1010,7 +1010,7 @@ func (s *Git) ScanStaged(ctx context.Context, repo *git.Repository, path string, JobID: s.jobID, SourceType: s.sourceType, SourceMetadata: metadata, - Verify: s.verify, + SourceVerify: s.verify, } if err := HandleBinary(ctx, gitDir, reporter, chunkSkel, commitHash, fileName, s.skipArchives); err != nil { logger.Error(err, "error handling binary file") @@ -1040,7 +1040,7 @@ func (s *Git) ScanStaged(ctx context.Context, repo *git.Repository, path string, SourceType: s.sourceType, SourceMetadata: metadata, Data: data, - Verify: s.verify, + SourceVerify: s.verify, } return reporter.ChunkOk(ctx, chunk) } diff --git a/pkg/sources/git/git_test.go b/pkg/sources/git/git_test.go index a986a5ddc81d..fa5eb8f7b3b4 100644 --- a/pkg/sources/git/git_test.go +++ b/pkg/sources/git/git_test.go @@ -59,9 +59,9 @@ func TestSource_Scan(t *testing.T) { concurrency: 4, }, wantChunk: &sources.Chunk{ - SourceType: sourcespb.SourceType_SOURCE_TYPE_GIT, - SourceName: "this repo", - Verify: false, + SourceType: sourcespb.SourceType_SOURCE_TYPE_GIT, + SourceName: "this repo", + SourceVerify: false, }, wantErr: false, }, @@ -78,9 +78,9 @@ func TestSource_Scan(t *testing.T) { concurrency: 4, }, wantChunk: &sources.Chunk{ - SourceType: sourcespb.SourceType_SOURCE_TYPE_GIT, - SourceName: "test source", - Verify: false, + SourceType: sourcespb.SourceType_SOURCE_TYPE_GIT, + SourceName: "test source", + SourceVerify: false, }, wantErr: false, }, @@ -97,9 +97,9 @@ func TestSource_Scan(t *testing.T) { concurrency: 0, }, wantChunk: &sources.Chunk{ - SourceType: sourcespb.SourceType_SOURCE_TYPE_GIT, - SourceName: "test source", - Verify: false, + SourceType: sourcespb.SourceType_SOURCE_TYPE_GIT, + SourceName: "test source", + SourceVerify: false, }, wantErr: false, }, @@ -119,9 +119,9 @@ func TestSource_Scan(t *testing.T) { concurrency: 4, }, wantChunk: &sources.Chunk{ - SourceType: sourcespb.SourceType_SOURCE_TYPE_GIT, - SourceName: "test source", - Verify: false, + SourceType: sourcespb.SourceType_SOURCE_TYPE_GIT, + SourceName: "test source", + SourceVerify: false, }, wantErr: false, }, diff --git a/pkg/sources/github/github.go b/pkg/sources/github/github.go index cf0b8fc79355..ccfb6dcf8c6f 100644 --- a/pkg/sources/github/github.go +++ b/pkg/sources/github/github.go @@ -1254,8 +1254,8 @@ func (s *Source) chunkGistComments(ctx context.Context, gistURL string, gistInfo }, }, }, - Data: []byte(sanitizer.UTF8(comment.GetBody())), - Verify: s.verify, + Data: []byte(sanitizer.UTF8(comment.GetBody())), + SourceVerify: s.verify, } if err := reporter.ChunkOk(ctx, chunk); err != nil { @@ -1391,8 +1391,8 @@ func (s *Source) chunkIssues(ctx context.Context, repoInfo repoInfo, issues []*g }, }, }, - Data: []byte(sanitizer.UTF8(issue.GetTitle() + "\n" + issue.GetBody())), - Verify: s.verify, + Data: []byte(sanitizer.UTF8(issue.GetTitle() + "\n" + issue.GetBody())), + SourceVerify: s.verify, } if err := reporter.ChunkOk(ctx, chunk); err != nil { @@ -1458,8 +1458,8 @@ func (s *Source) chunkIssueComments(ctx context.Context, repoInfo repoInfo, comm }, }, }, - Data: []byte(sanitizer.UTF8(comment.GetBody())), - Verify: s.verify, + Data: []byte(sanitizer.UTF8(comment.GetBody())), + SourceVerify: s.verify, } if err := reporter.ChunkOk(ctx, chunk); err != nil { @@ -1554,8 +1554,8 @@ func (s *Source) chunkPullRequests(ctx context.Context, repoInfo repoInfo, prs [ }, }, }, - Data: []byte(sanitizer.UTF8(pr.GetTitle() + "\n" + pr.GetBody())), - Verify: s.verify, + Data: []byte(sanitizer.UTF8(pr.GetTitle() + "\n" + pr.GetBody())), + SourceVerify: s.verify, } if err := reporter.ChunkOk(ctx, chunk); err != nil { @@ -1590,8 +1590,8 @@ func (s *Source) chunkPullRequestComments(ctx context.Context, repoInfo repoInfo }, }, }, - Data: []byte(sanitizer.UTF8(comment.GetBody())), - Verify: s.verify, + Data: []byte(sanitizer.UTF8(comment.GetBody())), + SourceVerify: s.verify, } if err := reporter.ChunkOk(ctx, chunk); err != nil { @@ -1629,7 +1629,7 @@ func (s *Source) scanTarget(ctx context.Context, target sources.ChunkingTarget, SourceMetadata: &source_metadatapb.MetaData{ Data: &source_metadatapb.MetaData_Github{Github: meta}, }, - Verify: s.verify, + SourceVerify: s.verify, } u, err := url.Parse(meta.GetLink()) diff --git a/pkg/sources/github/github_integration_test.go b/pkg/sources/github/github_integration_test.go index 6e5ce443b9c6..1bbed6305358 100644 --- a/pkg/sources/github/github_integration_test.go +++ b/pkg/sources/github/github_integration_test.go @@ -127,7 +127,7 @@ func TestSource_ScanComments(t *testing.T) { }, }, }, - Verify: false, + SourceVerify: false, }, wantErr: false, }, @@ -157,7 +157,7 @@ func TestSource_ScanComments(t *testing.T) { }, }, }, - Verify: false, + SourceVerify: false, }, wantErr: false, }, @@ -342,7 +342,7 @@ func TestSource_Scan(t *testing.T) { }, }, }, - Verify: false, + SourceVerify: false, }, wantErr: false, }, @@ -367,7 +367,7 @@ func TestSource_Scan(t *testing.T) { }, }, }, - Verify: false, + SourceVerify: false, }, wantErr: false, }, @@ -453,7 +453,7 @@ func TestSource_Scan(t *testing.T) { }, }, }, - Verify: false, + SourceVerify: false, }, wantErr: false, }, @@ -502,7 +502,7 @@ func TestSource_Scan(t *testing.T) { }, }, }, - Verify: false, + SourceVerify: false, }, wantErr: false, minRepo: 1, @@ -596,7 +596,7 @@ func TestSource_paginateGists(t *testing.T) { }, }, }, - Verify: false, + SourceVerify: false, }, wantErr: false, user: "truffle-sandbox", @@ -994,7 +994,7 @@ func TestSource_ScanCommentsWithGraphql(t *testing.T) { }, }, }, - Verify: false, + SourceVerify: false, } s := Source{} diff --git a/pkg/sources/github/graphql.go b/pkg/sources/github/graphql.go index e7e2f08cf09d..b82a93fe1dd4 100644 --- a/pkg/sources/github/graphql.go +++ b/pkg/sources/github/graphql.go @@ -418,8 +418,8 @@ func (s *Source) chunkGraphqlIssues(ctx context.Context, repoInfo repoInfo, issu }, }, }, - Data: []byte(sanitizer.UTF8(issue.Title + "\n" + issue.Body)), - Verify: s.verify, + Data: []byte(sanitizer.UTF8(issue.Title + "\n" + issue.Body)), + SourceVerify: s.verify, } if err := reporter.ChunkOk(ctx, chunk); err != nil { @@ -453,8 +453,8 @@ func (s *Source) chunkComments(ctx context.Context, repoInfo repoInfo, comments }, }, }, - Data: []byte(sanitizer.UTF8(comment.Body)), - Verify: s.verify, + Data: []byte(sanitizer.UTF8(comment.Body)), + SourceVerify: s.verify, } if err := reporter.ChunkOk(ctx, chunk); err != nil { @@ -483,8 +483,8 @@ func (s *Source) chunkGraphqlPullRequests(ctx context.Context, repoInfo repoInfo }, }, }, - Data: []byte(sanitizer.UTF8(pr.Title + "\n" + pr.Body)), - Verify: s.verify, + Data: []byte(sanitizer.UTF8(pr.Title + "\n" + pr.Body)), + SourceVerify: s.verify, } if err := reporter.ChunkOk(ctx, chunk); err != nil { diff --git a/pkg/sources/gitlab/gitlab.go b/pkg/sources/gitlab/gitlab.go index b8c9b5fe6728..bf1032554cc6 100644 --- a/pkg/sources/gitlab/gitlab.go +++ b/pkg/sources/gitlab/gitlab.go @@ -361,7 +361,7 @@ func (s *Source) scanTarget(ctx context.Context, client *gitlab.Client, target s SourceMetadata: &source_metadatapb.MetaData{ Data: &source_metadatapb.MetaData_Gitlab{Gitlab: meta}, }, - Verify: s.verify, + SourceVerify: s.verify, } if err := common.CancellableWrite(ctx, chunksChan, chunk); err != nil { diff --git a/pkg/sources/huggingface/huggingface.go b/pkg/sources/huggingface/huggingface.go index f64688abae19..80c3733ee43e 100644 --- a/pkg/sources/huggingface/huggingface.go +++ b/pkg/sources/huggingface/huggingface.go @@ -663,8 +663,8 @@ func (s *Source) chunkDiscussionComments(ctx context.Context, repoInfo repoInfo, }, }, }, - Data: []byte(comment.Data.Latest.Raw), - Verify: s.verify, + Data: []byte(comment.Data.Latest.Raw), + SourceVerify: s.verify, } select { case <-ctx.Done(): diff --git a/pkg/sources/jenkins/jenkins.go b/pkg/sources/jenkins/jenkins.go index 88a4b627de3d..e51805555d24 100644 --- a/pkg/sources/jenkins/jenkins.go +++ b/pkg/sources/jenkins/jenkins.go @@ -426,7 +426,7 @@ func (s *Source) chunkBuild( }, }, }, - Verify: s.verify, + SourceVerify: s.verify, } ctx.Logger().V(4).Info("scanning build log") diff --git a/pkg/sources/postman/postman.go b/pkg/sources/postman/postman.go index db8e81b26ef4..bed831b95a5c 100644 --- a/pkg/sources/postman/postman.go +++ b/pkg/sources/postman/postman.go @@ -788,7 +788,7 @@ func (s *Source) scanData(ctx context.Context, chunksChan chan *sources.Chunk, d }, }, }, - Verify: s.verify, + SourceVerify: s.verify, } } diff --git a/pkg/sources/s3/s3.go b/pkg/sources/s3/s3.go index f708376aac8d..b3d062637f7b 100644 --- a/pkg/sources/s3/s3.go +++ b/pkg/sources/s3/s3.go @@ -569,7 +569,7 @@ func (s *Source) pageChunker( }, }, }, - Verify: s.verify, + SourceVerify: s.verify, } if err := handlers.HandleFile(ctx, res.Body, chunkSkel, sources.ChanReporter{Ch: chunksChan}); err != nil { diff --git a/pkg/sources/sources.go b/pkg/sources/sources.go index e53465086e37..7b0e58515dc4 100644 --- a/pkg/sources/sources.go +++ b/pkg/sources/sources.go @@ -44,8 +44,8 @@ type Chunk struct { // SourceType is the type of Source that produced the chunk. SourceType sourcespb.SourceType - // Verify specifies whether any secrets in the Chunk should be verified. - Verify bool + // SourceVerify specifies whether this chunk was generated by a source that has verification enabled in its config. + SourceVerify bool } // ChunkingTarget specifies criteria for a targeted chunking process. diff --git a/pkg/sources/stdin/stdin.go b/pkg/sources/stdin/stdin.go index e05dfa1372a6..2097f638a13b 100644 --- a/pkg/sources/stdin/stdin.go +++ b/pkg/sources/stdin/stdin.go @@ -60,7 +60,7 @@ func (s *Source) Chunks(ctx context.Context, chunksChan chan *sources.Chunk, _ . SourceMetadata: &source_metadatapb.MetaData{ Data: &source_metadatapb.MetaData_Stdin{}, }, - Verify: s.verify, + SourceVerify: s.verify, } ctx.Logger().Info("scanning stdin for secrets") diff --git a/pkg/sources/syslog/syslog.go b/pkg/sources/syslog/syslog.go index 62f741f79542..dc1a1af6c446 100644 --- a/pkg/sources/syslog/syslog.go +++ b/pkg/sources/syslog/syslog.go @@ -282,7 +282,7 @@ func (s *Source) monitorConnection(ctx context.Context, conn net.Conn, chunksCha JobID: s.JobID(), SourceMetadata: metadata, Data: input, - Verify: s.verify, + SourceVerify: s.verify, } } } @@ -332,7 +332,7 @@ func (s *Source) acceptUDPConnections(ctx context.Context, netListener net.Packe SourceType: s.syslog.sourceType, SourceMetadata: metadata, Data: input, - Verify: s.verify, + SourceVerify: s.verify, } } } diff --git a/pkg/sources/travisci/travisci.go b/pkg/sources/travisci/travisci.go index a7ead20d0b7e..6065bddd6d76 100644 --- a/pkg/sources/travisci/travisci.go +++ b/pkg/sources/travisci/travisci.go @@ -203,7 +203,7 @@ func (s *Source) ChunkUnit(ctx context.Context, unit sources.SourceUnit, reporte }, }, }, - Verify: s.verify, + SourceVerify: s.verify, } if err := reporter.ChunkOk(ctx, chunk); err != nil {