Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pkg/decoders/escaped_unicode.go
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ func (d *EscapedUnicode) FromChunk(chunk *sources.Chunk) *DecodableChunk {
SecretID: chunk.SecretID,
SourceMetadata: chunk.SourceMetadata,
SourceType: chunk.SourceType,
Verify: chunk.Verify,
SourceVerify: chunk.SourceVerify,
},
}
} else {
Expand Down
10 changes: 6 additions & 4 deletions pkg/engine/engine.go
Original file line number Diff line number Diff line change
Expand Up @@ -774,6 +774,7 @@ type detectableChunk struct {
chunk sources.Chunk
decoder detectorspb.DecoderType
wgDoneFn func()
verify bool
}

// verificationOverlapChunk is a decoded chunk that has multiple detectors that match it.
Expand All @@ -792,7 +793,7 @@ func (e *Engine) scannerWorker(ctx context.Context) {

for chunk := range e.ChunksChan() {
startTime := time.Now()
sourceVerify := chunk.Verify
sourceVerify := chunk.SourceVerify
for _, decoder := range e.decoders {
decodeStart := time.Now()
// This copy is needed to preserve the original chunk.Data across multiple decoders.
Expand All @@ -819,12 +820,12 @@ func (e *Engine) scannerWorker(ctx context.Context) {
}

for _, detector := range matchingDetectors {
decoded.Chunk.Verify = e.shouldVerifyChunk(sourceVerify, detector, e.detectorVerificationOverrides)
wgDetect.Add(1)
e.detectableChunksChan <- detectableChunk{
chunk: *decoded.Chunk,
detector: detector,
decoder: decoded.DecoderType,
verify: e.shouldVerifyChunk(sourceVerify, detector, e.detectorVerificationOverrides),
wgDoneFn: wgDetect.Done,
}
}
Expand Down Expand Up @@ -1004,6 +1005,7 @@ func (e *Engine) verificationOverlapWorker(ctx context.Context) {
chunk: chunk.chunk,
detector: detector,
decoder: chunk.decoder,
verify: false,
wgDoneFn: wgDetect.Done,
},
res,
Expand All @@ -1022,11 +1024,11 @@ func (e *Engine) verificationOverlapWorker(ctx context.Context) {

for _, detector := range detectorKeysWithResults {
wgDetect.Add(1)
chunk.chunk.Verify = e.shouldVerifyChunk(chunk.chunk.Verify, detector, e.detectorVerificationOverrides)
e.detectableChunksChan <- detectableChunk{
chunk: chunk.chunk,
detector: detector,
decoder: chunk.decoder,
verify: e.shouldVerifyChunk(chunk.chunk.SourceVerify, detector, e.detectorVerificationOverrides),
wgDoneFn: wgDetect.Done,
}
}
Expand Down Expand Up @@ -1089,7 +1091,7 @@ func (e *Engine) detectChunk(ctx context.Context, data detectableChunk) {
results, err := e.verificationCache.FromData(
ctx,
data.detector.Detector,
data.chunk.Verify,
data.verify,
data.chunk.SecretID != 0,
matchBytes)
t.Stop()
Expand Down
155 changes: 98 additions & 57 deletions pkg/engine/engine_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1355,76 +1355,114 @@ func (p passthroughDecoder) FromChunk(chunk *sources.Chunk) *decoders.DecodableC

func (p passthroughDecoder) Type() detectorspb.DecoderType { return detectorspb.DecoderType(-1) }

// TestEngine_DetectChunk_UsesVerifyFlag validates that detectChunk correctly forwards detectableChunk.verify to
// detectors.
func TestEngine_DetectChunk_UsesVerifyFlag(t *testing.T) {
ctx := context.Background()

// Arrange: Create a minimal engine.
e := &Engine{
results: make(chan detectors.ResultWithMetadata, 1),
verificationCache: verificationcache.New(nil, &verificationcache.InMemoryMetrics{}),
testCases := []struct {
name string
verify bool
}{
{name: "verify=true", verify: true},
{name: "verify=false", verify: false},
}

// Arrange: Create a detector match. We can't create one directly, so we have to use a minimal A-H core.
ahcore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{passthroughDetector{keywords: []string{"keyword"}}})
detectorMatches := ahcore.FindDetectorMatches([]byte("keyword"))
require.Len(t, detectorMatches, 1)
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
// Arrange: Create a minimal engine.
e := &Engine{
results: make(chan detectors.ResultWithMetadata, 1),
verificationCache: verificationcache.New(nil, &verificationcache.InMemoryMetrics{}),
}

// Arrange: Create a chunk to detect.
chunk := detectableChunk{
chunk: sources.Chunk{
Verify: true,
},
detector: detectorMatches[0],
wgDoneFn: func() {},
}
// Arrange: Create a detector match. We can't create one directly, so we have to use a minimal A-H core.
ahcore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{passthroughDetector{keywords: []string{"keyword"}}})
detectorMatches := ahcore.FindDetectorMatches([]byte("keyword"))
require.Len(t, detectorMatches, 1)

// Arrange: Create a chunk to detect.
chunk := detectableChunk{
detector: detectorMatches[0],
verify: tc.verify,
wgDoneFn: func() {},
}

// Act
e.detectChunk(ctx, chunk)
close(e.results)
// Act
e.detectChunk(ctx, chunk)
close(e.results)

// Assert: Confirm that a result was generated and that it has the expected verify flag.
select {
case result := <-e.results:
assert.True(t, result.Result.Verified)
default:
t.Errorf("expected a result but did not get one")
// Assert: Confirm that a result was generated and that it has the expected verify flag.
select {
case result := <-e.results:
assert.Equal(t, tc.verify, result.Result.Verified)
default:
t.Errorf("expected a result but did not get one")
}
})
}
}

// TestEngine_ScannerWorker_DetectableChunkHasCorrectVerifyFlag validates that scannerWorker generates detectableChunk
// structs that have the correct verify flag set. It also validates that the original chunks' SourceVerify flags are
// unchanged.
func TestEngine_ScannerWorker_DetectableChunkHasCorrectVerifyFlag(t *testing.T) {
ctx := context.Background()

// Arrange: Create a minimal engine.
detector := &passthroughDetector{keywords: []string{"keyword"}}
e := &Engine{
AhoCorasickCore: ahocorasick.NewAhoCorasickCore([]detectors.Detector{detector}),
decoders: []decoders.Decoder{passthroughDecoder{}},
detectableChunksChan: make(chan detectableChunk, 1),
sourceManager: sources.NewManager(),
verify: true,
testCases := []struct {
name string
engineVerify bool
sourceVerify bool
wantVerify bool
}{
{name: "engineVerify=false,sourceVerify=false", engineVerify: false, sourceVerify: false, wantVerify: false},
{name: "engineVerify=false,sourceVerify=true", engineVerify: false, sourceVerify: true, wantVerify: false},
{name: "engineVerify=true,sourceVerify=false", engineVerify: true, sourceVerify: false, wantVerify: false},
{name: "engineVerify=true,sourceVerify=true", engineVerify: true, sourceVerify: true, wantVerify: true},
}

// Arrange: Create a chunk to scan.
chunk := sources.Chunk{
Data: []byte("keyword"),
Verify: true,
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
// Arrange: Create a minimal engine.
detector := &passthroughDetector{keywords: []string{"keyword"}}
e := &Engine{
AhoCorasickCore: ahocorasick.NewAhoCorasickCore([]detectors.Detector{detector}),
decoders: []decoders.Decoder{passthroughDecoder{}},
detectableChunksChan: make(chan detectableChunk, 1),
sourceManager: sources.NewManager(),
verify: tc.engineVerify,
}

// Arrange: Create a chunk to scan.
chunk := sources.Chunk{
Data: []byte("keyword"),
SourceVerify: tc.sourceVerify,
}

// Arrange: Enqueue a chunk to be scanned.
e.sourceManager.ScanChunk(&chunk)
// Arrange: Enqueue a chunk to be scanned.
e.sourceManager.ScanChunk(&chunk)

// Act
go e.scannerWorker(ctx)
// Act
go e.scannerWorker(ctx)

// Assert: Confirm that a chunk was generated and that it has the expected verify flag.
select {
case chunk := <-e.detectableChunksChan:
assert.True(t, chunk.chunk.Verify)
case <-time.After(1 * time.Second):
t.Errorf("expected a detectableChunk but did not get one")
// Assert: Confirm that a chunk was generated, that its SourceVerify flag is unchanged, and that its verify
// flag is correctly set.
select {
case chunk := <-e.detectableChunksChan:
assert.Equal(t, tc.sourceVerify, chunk.chunk.SourceVerify)
assert.Equal(t, tc.wantVerify, chunk.verify)
case <-time.After(1 * time.Second):
t.Errorf("expected a detectableChunk but did not get one")
}
})
}
}

// TestEngine_VerificationOverlapWorker_DetectableChunkHasCorrectVerifyFlag validates that the results directly
// generated by verificationOverlapWorker all came from detector invocations with the verify flag cleared (because these
// results were generated from verification overlaps). It also validates that detectableChunk structs generated by
// verificationOverlapWorker have their verify flags correctly set, and that these structs' original chunks'
// SourceVerify flags are unchanged.
func TestEngine_VerificationOverlapWorker_DetectableChunkHasCorrectVerifyFlag(t *testing.T) {
ctx := context.Background()

Expand All @@ -1450,8 +1488,8 @@ func TestEngine_VerificationOverlapWorker_DetectableChunkHasCorrectVerifyFlag(t

// Arrange: Create a chunk to "scan."
chunk := sources.Chunk{
Data: []byte("keyword ;oahpow8heg;blaisd"),
Verify: true,
Data: []byte("keyword ;oahpow8heg;blaisd"),
SourceVerify: true,
}

// Arrange: Create overlapping detector matches. We can't create them directly, so we have to use a minimal A-H
Expand Down Expand Up @@ -1481,11 +1519,13 @@ func TestEngine_VerificationOverlapWorker_DetectableChunkHasCorrectVerifyFlag(t
assert.False(t, result.Result.Verified)
}

// Assert: Confirm that every generated detectable chunk carries the original Verify flag.
// Assert: Confirm that every generated detectable chunk's Chunk.SourceVerify flag is unchanged and that its
// verify flag is correctly set.
// CMR: There should be not be any of these chunks. However, due to what I believe is an unrelated bug, there
// are. This test ensures that even in that erroneous case, their Verify flag is correct.
// are. This test ensures that even in that erroneous case, their contents are correct.
for detectableChunk := range processedDetectableChunks {
assert.True(t, detectableChunk.chunk.Verify)
assert.True(t, detectableChunk.verify)
assert.True(t, detectableChunk.chunk.SourceVerify)
}
})
t.Run("no overlap", func(t *testing.T) {
Expand All @@ -1509,8 +1549,8 @@ func TestEngine_VerificationOverlapWorker_DetectableChunkHasCorrectVerifyFlag(t

// Arrange: Create a chunk to "scan."
chunk := sources.Chunk{
Data: []byte("keyword ;oahpow8heg;blaisd"),
Verify: true,
Data: []byte("keyword ;oahpow8heg;blaisd"),
SourceVerify: true,
}

// Arrange: Create non-overlapping detector matches. We can't create them directly, so we have to use a minimal
Expand All @@ -1534,9 +1574,10 @@ func TestEngine_VerificationOverlapWorker_DetectableChunkHasCorrectVerifyFlag(t
close(e.detectableChunksChan)
close(processedDetectableChunks)

// Assert: Confirm that every generated detectable chunk carries the original Verify flag.
// Assert: Confirm that SourceVerify flags are unchanged, and verify flags are correctly set.
for detectableChunk := range processedDetectableChunks {
assert.True(t, detectableChunk.chunk.Verify)
assert.True(t, detectableChunk.chunk.SourceVerify)
assert.True(t, detectableChunk.verify)
}
})
}
2 changes: 1 addition & 1 deletion pkg/sources/circleci/circleci.go
Original file line number Diff line number Diff line change
Expand Up @@ -375,7 +375,7 @@ func (s *Source) chunk(ctx context.Context, proj project, buildNum BuildNum, ste
},
},
},
Verify: s.verify,
SourceVerify: s.verify,
}
if err := data.Error(); err != nil {
return err
Expand Down
6 changes: 3 additions & 3 deletions pkg/sources/docker/docker.go
Original file line number Diff line number Diff line change
Expand Up @@ -342,8 +342,8 @@ func (s *Source) processHistoryEntry(ctx context.Context, historyInfo historyEnt
},
},
},
Verify: s.verify,
Data: []byte(historyInfo.entry.CreatedBy),
SourceVerify: s.verify,
Data: []byte(historyInfo.entry.CreatedBy),
}

ctx.Logger().V(2).Info("scanning image history entry", "index", historyInfo.index, "layer", historyInfo.layerDigest)
Expand Down Expand Up @@ -455,7 +455,7 @@ func (s *Source) processChunk(ctx context.Context, info chunkProcessingInfo, chu
},
},
},
Verify: s.verify,
SourceVerify: s.verify,
}
chunk.Data = data.Bytes()

Expand Down
2 changes: 1 addition & 1 deletion pkg/sources/elasticsearch/elasticsearch.go
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@ func (s *Source) Chunks(
},
},
},
Verify: s.verify,
SourceVerify: s.verify,
}

chunk.Data = []byte(document.message)
Expand Down
2 changes: 1 addition & 1 deletion pkg/sources/filesystem/filesystem.go
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,7 @@ func (s *Source) scanFile(ctx context.Context, path string, chunksChan chan *sou
},
},
},
Verify: s.verify,
SourceVerify: s.verify,
}

return handlers.HandleFile(fileCtx, inputFile, chunkSkel, sources.ChanReporter{Ch: chunksChan})
Expand Down
10 changes: 5 additions & 5 deletions pkg/sources/gcs/gcs.go
Original file line number Diff line number Diff line change
Expand Up @@ -333,11 +333,11 @@ func (s *Source) completeProgress(ctx context.Context) {

func (s *Source) processObject(ctx context.Context, o object) error {
chunkSkel := &sources.Chunk{
SourceName: s.name,
SourceType: s.Type(),
JobID: s.JobID(),
SourceID: s.sourceId,
Verify: s.verify,
SourceName: s.name,
SourceType: s.Type(),
JobID: s.JobID(),
SourceID: s.sourceId,
SourceVerify: s.verify,
SourceMetadata: &source_metadatapb.MetaData{
Data: &source_metadatapb.MetaData_Gcs{
Gcs: &source_metadatapb.GCS{
Expand Down
16 changes: 8 additions & 8 deletions pkg/sources/gcs/gcs_integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,10 +79,10 @@ func TestChunks_PublicBucket(t *testing.T) {

want := []*sources.Chunk{
{
SourceName: "test",
SourceType: sourcespb.SourceType_SOURCE_TYPE_GCS,
SourceID: 0,
Verify: true,
SourceName: "test",
SourceType: sourcespb.SourceType_SOURCE_TYPE_GCS,
SourceID: 0,
SourceVerify: true,
SourceMetadata: &source_metadatapb.MetaData{
Data: &source_metadatapb.MetaData_Gcs{
Gcs: &source_metadatapb.GCS{
Expand Down Expand Up @@ -164,10 +164,10 @@ func createTestChunks() []*sources.Chunk {
chunks := make([]*sources.Chunk, 0, len(objects))
for _, o := range objects {
chunks = append(chunks, &sources.Chunk{
SourceName: "test",
SourceType: sourcespb.SourceType_SOURCE_TYPE_GCS,
SourceID: 0,
Verify: true,
SourceName: "test",
SourceType: sourcespb.SourceType_SOURCE_TYPE_GCS,
SourceID: 0,
SourceVerify: true,
SourceMetadata: &source_metadatapb.MetaData{
Data: &source_metadatapb.MetaData_Gcs{
Gcs: &source_metadatapb.GCS{
Expand Down
10 changes: 5 additions & 5 deletions pkg/sources/gcs/gcs_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -271,11 +271,11 @@ func createTestObject(id int) object {

func createTestSourceChunk(id int) *sources.Chunk {
return &sources.Chunk{
SourceName: "test",
SourceType: sourcespb.SourceType_SOURCE_TYPE_GCS,
SourceID: 0,
Verify: true,
Data: []byte(fmt.Sprintf("hello world %d", id)),
SourceName: "test",
SourceType: sourcespb.SourceType_SOURCE_TYPE_GCS,
SourceID: 0,
SourceVerify: true,
Data: []byte(fmt.Sprintf("hello world %d", id)),
SourceMetadata: &source_metadatapb.MetaData{
Data: &source_metadatapb.MetaData_Gcs{
Gcs: &source_metadatapb.GCS{
Expand Down
Loading
Loading