From 6953bad3b3d7584a2030c851203b7a065b10e2d5 Mon Sep 17 00:00:00 2001
From: Joe Elliott <number101010@gmail.com>
Date: Fri, 3 Feb 2023 13:45:05 -0500
Subject: [PATCH 01/27] sketching out ideas

Signed-off-by: Joe Elliott <number101010@gmail.com>
---
 pkg/traceql/engine.go                      |  2 ++
 pkg/traceql/storage.go                     | 17 ++++++++++++-----
 tempodb/encoding/vparquet/block_traceql.go |  5 ++++-
 3 files changed, 18 insertions(+), 6 deletions(-)

diff --git a/pkg/traceql/engine.go b/pkg/traceql/engine.go
index 1b18dde4719..3a351c6cb77 100644
--- a/pkg/traceql/engine.go
+++ b/pkg/traceql/engine.go
@@ -82,6 +82,8 @@ iter:
 		}
 	}
 
+	// jpe iterate over all matching spansets and request metadata
+
 	span.SetTag("spansets_evaluated", spansetsEvaluated)
 	span.SetTag("spansets_found", len(res.Traces))
 
diff --git a/pkg/traceql/storage.go b/pkg/traceql/storage.go
index e758eb895b2..496f38a4156 100644
--- a/pkg/traceql/storage.go
+++ b/pkg/traceql/storage.go
@@ -31,21 +31,28 @@ func (f *FetchSpansRequest) appendCondition(c ...Condition) {
 }
 
 type Span struct {
+	// jpe need rownumber to go fetch metadata later?
+	Attributes map[Attribute]Static
+}
+
+type Spanset struct {
+	Scalar Static
+	Spans  []Span
+}
+
+type SpanMetadata struct {
 	ID                 []byte
 	StartTimeUnixNanos uint64
 	EndtimeUnixNanos   uint64
-	Attributes         map[Attribute]Static
 }
 
-type Spanset struct {
+type SpansetMetadata struct {
 	TraceID            []byte
 	RootSpanName       string
 	RootServiceName    string
 	StartTimeUnixNanos uint64
 	DurationNanos      uint64
-	Attributes         map[Attribute]Static
-	Scalar             Static
-	Spans              []Span
+	Span               []SpanMetadata
 }
 
 type SpansetIterator interface {
diff --git a/tempodb/encoding/vparquet/block_traceql.go b/tempodb/encoding/vparquet/block_traceql.go
index d8d6b92a65f..91978b287b8 100644
--- a/tempodb/encoding/vparquet/block_traceql.go
+++ b/tempodb/encoding/vparquet/block_traceql.go
@@ -459,7 +459,7 @@ func createSpanIterator(makeIter makeIterFn, conditions []traceql.Condition, sta
 		iters = append(iters, makeIter(columnPath, parquetquery.NewOrPredicate(predicates...), columnSelectAs[columnPath]))
 	}
 
-	// Time range filtering?
+	// Time range filtering? jpe - move up to trace level?
 	var startFilter, endFilter parquetquery.Predicate
 	if start > 0 && end > 0 {
 		// Here's how we detect the span overlaps the time window:
@@ -512,6 +512,9 @@ func createSpanIterator(makeIter makeIterFn, conditions []traceql.Condition, sta
 	required = append(required, makeIter(columnPathSpanEndTime, endFilter, columnPathSpanEndTime))
 	required = append(required, makeIter(columnPathSpanID, nil, columnPathSpanID))
 
+	// jpe if there are no direct conditions imposed on the span/span attributes level we are purposefully going to request the "Kind" column
+	//  b/c it is extremely cheap to retrieve. retrieving matching spans in this case will allow aggregates such as "count" to be computed
+
 	// Left join here means the span id/start/end iterators + 1 are required,
 	// and all other conditions are optional. Whatever matches is returned.
 	return parquetquery.NewLeftJoinIterator(DefinitionLevelResourceSpansILSSpan, required, iters, spanCol), nil

From e871134f2285124bd18af77dcf413200d8cd063f Mon Sep 17 00:00:00 2001
From: Joe Elliott <number101010@gmail.com>
Date: Fri, 3 Feb 2023 14:44:27 -0500
Subject: [PATCH 02/27] wip: traceql engine two pass

Signed-off-by: Joe Elliott <number101010@gmail.com>
---
 pkg/traceql/ast_execute_test.go            | 48 +++++++++++-----------
 pkg/traceql/ast_test.go                    | 34 +++++++--------
 pkg/traceql/engine.go                      | 40 +++++++++++-------
 pkg/traceql/engine_test.go                 | 21 ++++------
 pkg/traceql/storage.go                     | 13 +++++-
 tempodb/encoding/vparquet/block_traceql.go |  3 +-
 6 files changed, 89 insertions(+), 70 deletions(-)

diff --git a/pkg/traceql/ast_execute_test.go b/pkg/traceql/ast_execute_test.go
index a09f17809ef..7fd056ca990 100644
--- a/pkg/traceql/ast_execute_test.go
+++ b/pkg/traceql/ast_execute_test.go
@@ -157,18 +157,18 @@ func TestSpansetOperationEvaluate(t *testing.T) {
 			[]Spanset{
 				{Spans: []Span{
 					// This spanset will be kept because it satisfies both conditions
-					{ID: []byte{1}, Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("a")}},
-					{ID: []byte{2}, Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("b")}},
+					{Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("a")}},
+					{Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("b")}},
 				}},
 				{Spans: []Span{
 					// This spanset will be dropped
-					{ID: []byte{3}, Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("b")}},
+					{Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("b")}},
 				}},
 			},
 			[]Spanset{
 				{Spans: []Span{
-					{ID: []byte{1}, Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("a")}},
-					{ID: []byte{2}, Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("b")}},
+					{Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("a")}},
+					{Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("b")}},
 				}},
 			},
 		},
@@ -176,22 +176,22 @@ func TestSpansetOperationEvaluate(t *testing.T) {
 			"{ .foo = `a` } || { .foo = `b` }",
 			[]Spanset{
 				{Spans: []Span{
-					{ID: []byte{1}, Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("a")}},
-					{ID: []byte{2}, Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("b")}},
+					{Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("a")}},
+					{Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("b")}},
 				}},
 				{Spans: []Span{
 					// Second span will be dropped
-					{ID: []byte{3}, Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("b")}},
-					{ID: []byte{4}, Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("c")}},
+					{Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("b")}},
+					{Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("c")}},
 				}},
 			},
 			[]Spanset{
 				{Spans: []Span{
-					{ID: []byte{1}, Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("a")}},
-					{ID: []byte{2}, Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("b")}},
+					{Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("a")}},
+					{Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("b")}},
 				}},
 				{Spans: []Span{
-					{ID: []byte{3}, Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("b")}},
+					{Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("b")}},
 				}},
 			},
 		},
@@ -222,21 +222,21 @@ func TestScalarFilterEvaluate(t *testing.T) {
 			[]Spanset{
 				{Spans: []Span{
 					// This has 1 match
-					{ID: []byte{1}, Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("a")}},
-					{ID: []byte{2}, Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("b")}},
+					{Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("a")}},
+					{Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("b")}},
 				}},
 				{Spans: []Span{
 					// This has 2 matches
-					{ID: []byte{3}, Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("a")}},
-					{ID: []byte{4}, Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("a")}},
+					{Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("a")}},
+					{Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("a")}},
 				}},
 			},
 			[]Spanset{
 				{
 					Scalar: NewStaticInt(2),
 					Spans: []Span{
-						{ID: []byte{3}, Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("a")}},
-						{ID: []byte{4}, Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("a")}},
+						{Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("a")}},
+						{Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("a")}},
 					},
 				},
 			},
@@ -246,22 +246,22 @@ func TestScalarFilterEvaluate(t *testing.T) {
 			[]Spanset{
 				{Spans: []Span{
 					// Avg duration = 5ms
-					{ID: []byte{1}, Attributes: map[Attribute]Static{
+					{Attributes: map[Attribute]Static{
 						NewAttribute("foo"):             NewStaticString("a"),
 						NewIntrinsic(IntrinsicDuration): NewStaticDuration(2 * time.Millisecond)},
 					},
-					{ID: []byte{2}, Attributes: map[Attribute]Static{
+					{Attributes: map[Attribute]Static{
 						NewAttribute("foo"):             NewStaticString("a"),
 						NewIntrinsic(IntrinsicDuration): NewStaticDuration(8 * time.Millisecond)},
 					},
 				}},
 				{Spans: []Span{
 					// Avg duration = 10ms
-					{ID: []byte{3}, Attributes: map[Attribute]Static{
+					{Attributes: map[Attribute]Static{
 						NewAttribute("foo"):             NewStaticString("a"),
 						NewIntrinsic(IntrinsicDuration): NewStaticDuration(5 * time.Millisecond)},
 					},
-					{ID: []byte{4}, Attributes: map[Attribute]Static{
+					{Attributes: map[Attribute]Static{
 						NewAttribute("foo"):             NewStaticString("a"),
 						NewIntrinsic(IntrinsicDuration): NewStaticDuration(15 * time.Millisecond)},
 					},
@@ -273,11 +273,11 @@ func TestScalarFilterEvaluate(t *testing.T) {
 					// avg(duration) should probably return a Duration instead of a float.
 					Scalar: NewStaticFloat(10.0 * float64(time.Millisecond)),
 					Spans: []Span{
-						{ID: []byte{3}, Attributes: map[Attribute]Static{
+						{Attributes: map[Attribute]Static{
 							NewAttribute("foo"):             NewStaticString("a"),
 							NewIntrinsic(IntrinsicDuration): NewStaticDuration(5 * time.Millisecond)},
 						},
-						{ID: []byte{4}, Attributes: map[Attribute]Static{
+						{Attributes: map[Attribute]Static{
 							NewAttribute("foo"):             NewStaticString("a"),
 							NewIntrinsic(IntrinsicDuration): NewStaticDuration(15 * time.Millisecond)},
 						},
diff --git a/pkg/traceql/ast_test.go b/pkg/traceql/ast_test.go
index 19f73ab7ec4..9075e1ac4a1 100644
--- a/pkg/traceql/ast_test.go
+++ b/pkg/traceql/ast_test.go
@@ -127,13 +127,13 @@ func TestPipelineEvaluate(t *testing.T) {
 			[]Spanset{
 				{Spans: []Span{
 					// First span should be dropped here
-					{ID: []byte{1}, Attributes: map[Attribute]Static{NewAttribute("foo1"): NewStaticString("a")}},
-					{ID: []byte{2}, Attributes: map[Attribute]Static{NewAttribute("foo1"): NewStaticString("a"), NewAttribute("foo2"): NewStaticString("b")}},
+					{Attributes: map[Attribute]Static{NewAttribute("foo1"): NewStaticString("a")}},
+					{Attributes: map[Attribute]Static{NewAttribute("foo1"): NewStaticString("a"), NewAttribute("foo2"): NewStaticString("b")}},
 				}},
 			},
 			[]Spanset{
 				{Spans: []Span{
-					{ID: []byte{2}, Attributes: map[Attribute]Static{NewAttribute("foo1"): NewStaticString("a"), NewAttribute("foo2"): NewStaticString("b")}}}},
+					{Attributes: map[Attribute]Static{NewAttribute("foo1"): NewStaticString("a"), NewAttribute("foo2"): NewStaticString("b")}}}},
 			},
 		},
 	}
@@ -171,17 +171,17 @@ func TestSpansetFilterEvaluate(t *testing.T) {
 			[]Spanset{
 				{Spans: []Span{
 					// Second span should be dropped here
-					{ID: []byte{1}, Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("a")}},
-					{ID: []byte{2}, Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("b")}},
+					{Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("a")}},
+					{Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("b")}},
 				}},
 				{Spans: []Span{
 					// This entire spanset will be dropped
-					{ID: []byte{3}, Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("b")}},
+					{Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("b")}},
 				}},
 			},
 			[]Spanset{
 				{Spans: []Span{
-					{ID: []byte{1}, Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("a")}}}},
+					{Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("a")}}}},
 			},
 		},
 		{
@@ -189,27 +189,27 @@ func TestSpansetFilterEvaluate(t *testing.T) {
 			[]Spanset{
 				{Spans: []Span{
 					// Second span should be dropped here
-					{ID: []byte{1}, Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticInt(1)}},
-					{ID: []byte{2}, Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticInt(2)}},
+					{Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticInt(1)}},
+					{Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticInt(2)}},
 				}},
 				{Spans: []Span{
 					// First span should be dropped here
-					{ID: []byte{3}, Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticInt(3)}},
-					{ID: []byte{4}, Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticInt(4)}},
-					{ID: []byte{5}, Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticInt(5)}},
+					{Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticInt(3)}},
+					{Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticInt(4)}},
+					{Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticInt(5)}},
 				}},
 				{Spans: []Span{
 					// Entire spanset should be dropped
-					{ID: []byte{3}, Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticInt(6)}},
-					{ID: []byte{4}, Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticInt(7)}},
+					{Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticInt(6)}},
+					{Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticInt(7)}},
 				}},
 			},
 			[]Spanset{
 				{Spans: []Span{
-					{ID: []byte{1}, Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticInt(1)}}}},
+					{Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticInt(1)}}}},
 				{Spans: []Span{
-					{ID: []byte{4}, Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticInt(4)}},
-					{ID: []byte{5}, Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticInt(5)}},
+					{Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticInt(4)}},
+					{Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticInt(5)}},
 				}},
 			},
 		},
diff --git a/pkg/traceql/engine.go b/pkg/traceql/engine.go
index 3a351c6cb77..16de463903f 100644
--- a/pkg/traceql/engine.go
+++ b/pkg/traceql/engine.go
@@ -42,15 +42,9 @@ func (e *Engine) Execute(ctx context.Context, searchReq *tempopb.SearchRequest,
 	}
 	iterator := fetchSpansResponse.Results
 
-	res := &tempopb.SearchResponse{
-		Traces: nil,
-		// TODO capture and update metrics
-		Metrics: &tempopb.SearchMetrics{},
-	}
-
 	spansetsEvaluated := 0
 
-iter:
+	results := []Spanset{}
 	for {
 		spanSet, err := iterator.Next(ctx)
 		if err != nil {
@@ -73,16 +67,34 @@ iter:
 			continue
 		}
 
-		for _, spanSet := range ss {
-			res.Traces = append(res.Traces, e.asTraceSearchMetadata(spanSet))
+		results = append(results, ss...)
 
-			if len(res.Traces) == int(searchReq.Limit) {
-				break iter
-			}
+		if len(results) == int(searchReq.Limit) {
+			break
 		}
 	}
 
-	// jpe iterate over all matching spansets and request metadata
+	if len(results) == 0 {
+		return &tempopb.SearchResponse{
+			Traces: nil,
+			// TODO capture and update metrics
+			Metrics: &tempopb.SearchMetrics{},
+		}, nil
+	}
+
+	ss, err := spanSetFetcher.FetchMetadata(ctx, results)
+	if err != nil {
+		return nil, err
+	}
+
+	res := &tempopb.SearchResponse{
+		Traces: nil,
+		// TODO capture and update metrics
+		Metrics: &tempopb.SearchMetrics{},
+	}
+	for _, spanSet := range ss {
+		res.Traces = append(res.Traces, e.asTraceSearchMetadata(spanSet))
+	}
 
 	span.SetTag("spansets_evaluated", spansetsEvaluated)
 	span.SetTag("spansets_found", len(res.Traces))
@@ -114,7 +126,7 @@ func (e *Engine) createFetchSpansRequest(searchReq *tempopb.SearchRequest, pipel
 	return req
 }
 
-func (e *Engine) asTraceSearchMetadata(spanset Spanset) *tempopb.TraceSearchMetadata {
+func (e *Engine) asTraceSearchMetadata(spanset SpansetMetadata) *tempopb.TraceSearchMetadata {
 	metadata := &tempopb.TraceSearchMetadata{
 		TraceID:           util.TraceIDToHexString(spanset.TraceID),
 		RootServiceName:   spanset.RootServiceName,
diff --git a/pkg/traceql/engine_test.go b/pkg/traceql/engine_test.go
index ab12dedee02..dedb4648160 100644
--- a/pkg/traceql/engine_test.go
+++ b/pkg/traceql/engine_test.go
@@ -19,6 +19,8 @@ import (
 	"github.com/grafana/tempo/pkg/util"
 )
 
+// jpe - everywhere you removed ID replace with span rownumbers
+
 func TestEngine_Execute(t *testing.T) {
 	now := time.Now()
 	e := Engine{}
@@ -30,20 +32,13 @@ func TestEngine_Execute(t *testing.T) {
 		iterator: &MockSpanSetIterator{
 			results: []*Spanset{
 				{
-					TraceID:         []byte{1},
-					RootSpanName:    "HTTP GET",
-					RootServiceName: "my-service",
 					Spans: []Span{
 						{
-							ID: []byte{1},
 							Attributes: map[Attribute]Static{
 								NewAttribute("foo"): NewStaticString("value"),
 							},
 						},
 						{
-							ID:                 []byte{2},
-							StartTimeUnixNanos: uint64(now.UnixNano()),
-							EndtimeUnixNanos:   uint64(now.Add(100 * time.Millisecond).UnixNano()),
 							Attributes: map[Attribute]Static{
 								NewAttribute("foo"): NewStaticString("value"),
 								NewAttribute("bar"): NewStaticString("value"),
@@ -52,12 +47,8 @@ func TestEngine_Execute(t *testing.T) {
 					},
 				},
 				{
-					TraceID:         []byte{2},
-					RootSpanName:    "HTTP POST",
-					RootServiceName: "my-service",
 					Spans: []Span{
 						{
-							ID: []byte{3},
 							Attributes: map[Attribute]Static{
 								NewAttribute("bar"): NewStaticString("value"),
 							},
@@ -143,13 +134,13 @@ func TestEngine_asTraceSearchMetadata(t *testing.T) {
 	spanID1 := traceID[:8]
 	spanID2 := traceID[8:]
 
-	spanSet := Spanset{
+	spanSet := SpansetMetadata{
 		TraceID:            traceID,
 		RootServiceName:    "my-service",
 		RootSpanName:       "HTTP GET",
 		StartTimeUnixNanos: 1000,
 		DurationNanos:      uint64(time.Second.Nanoseconds()),
-		Spans: []Span{
+		Spans: []SpanMetadata{
 			{
 				ID:                 spanID1,
 				StartTimeUnixNanos: uint64(now.UnixNano()),
@@ -266,6 +257,10 @@ func (m *MockSpanSetFetcher) Fetch(ctx context.Context, request FetchSpansReques
 	}, nil
 }
 
+func (m *MockSpanSetFetcher) FetchMetadata(context.Context, []Spanset) ([]SpansetMetadata, error) {
+	return nil, fmt.Errorf("not implemented") // jpe test?
+}
+
 type MockSpanSetIterator struct {
 	results []*Spanset
 }
diff --git a/pkg/traceql/storage.go b/pkg/traceql/storage.go
index 496f38a4156..7638cdf3e84 100644
--- a/pkg/traceql/storage.go
+++ b/pkg/traceql/storage.go
@@ -2,6 +2,9 @@ package traceql
 
 import (
 	"context"
+	"fmt"
+
+	"github.com/grafana/tempo/pkg/parquetquery"
 )
 
 type Operands []Static
@@ -32,6 +35,7 @@ func (f *FetchSpansRequest) appendCondition(c ...Condition) {
 
 type Span struct {
 	// jpe need rownumber to go fetch metadata later?
+	rowNum     parquetquery.RowNumber
 	Attributes map[Attribute]Static
 }
 
@@ -44,6 +48,7 @@ type SpanMetadata struct {
 	ID                 []byte
 	StartTimeUnixNanos uint64
 	EndtimeUnixNanos   uint64
+	Attributes         map[Attribute]Static // jpe copy from Span in the fetch part?
 }
 
 type SpansetMetadata struct {
@@ -52,7 +57,7 @@ type SpansetMetadata struct {
 	RootServiceName    string
 	StartTimeUnixNanos uint64
 	DurationNanos      uint64
-	Span               []SpanMetadata
+	Spans              []SpanMetadata
 }
 
 type SpansetIterator interface {
@@ -66,6 +71,7 @@ type FetchSpansResponse struct {
 
 type SpansetFetcher interface {
 	Fetch(context.Context, FetchSpansRequest) (FetchSpansResponse, error)
+	FetchMetadata(context.Context, []Spanset) ([]SpansetMetadata, error) // jpe review this method signature
 }
 
 // MustExtractFetchSpansRequest parses the given traceql query and returns
@@ -107,3 +113,8 @@ func NewSpansetFetcherWrapper(f func(ctx context.Context, req FetchSpansRequest)
 func (s SpansetFetcherWrapper) Fetch(ctx context.Context, request FetchSpansRequest) (FetchSpansResponse, error) {
 	return s.f(ctx, request)
 }
+
+func (s SpansetFetcherWrapper) FetchMetadata(ctx context.Context, spansets []Spanset) ([]SpansetMetadata, error) {
+	// jpe implement
+	return nil, fmt.Errorf("not implemented")
+}
diff --git a/tempodb/encoding/vparquet/block_traceql.go b/tempodb/encoding/vparquet/block_traceql.go
index 91978b287b8..65f5c798260 100644
--- a/tempodb/encoding/vparquet/block_traceql.go
+++ b/tempodb/encoding/vparquet/block_traceql.go
@@ -513,7 +513,8 @@ func createSpanIterator(makeIter makeIterFn, conditions []traceql.Condition, sta
 	required = append(required, makeIter(columnPathSpanID, nil, columnPathSpanID))
 
 	// jpe if there are no direct conditions imposed on the span/span attributes level we are purposefully going to request the "Kind" column
-	//  b/c it is extremely cheap to retrieve. retrieving matching spans in this case will allow aggregates such as "count" to be computed
+	//  b/c it is extremely cheap to retrieve. retrieving matching spans in this case will allow] aggregates such as "count" to be computed
+	//  how do we know to pull duration for things like | avg(duration) > 1s? look at avg(span.http.status_code) it pushes a column request down here
 
 	// Left join here means the span id/start/end iterators + 1 are required,
 	// and all other conditions are optional. Whatever matches is returned.

From 81b68bc3ccbeb6a18c9ac8fcda9d291d07d09354 Mon Sep 17 00:00:00 2001
From: Joe Elliott <number101010@gmail.com>
Date: Mon, 6 Feb 2023 08:36:47 -0500
Subject: [PATCH 03/27] inteface cleanup

Signed-off-by: Joe Elliott <number101010@gmail.com>
---
 tempodb/encoding/common/interfaces.go |  2 ++
 tempodb/encoding/v2/backend_block.go  |  7 +++++--
 tempodb/encoding/v2/wal_block.go      |  5 +++++
 tempodb/wal/wal_test.go               | 11 +++++++----
 4 files changed, 19 insertions(+), 6 deletions(-)

diff --git a/tempodb/encoding/common/interfaces.go b/tempodb/encoding/common/interfaces.go
index 5621b639b29..7f1f8089403 100644
--- a/tempodb/encoding/common/interfaces.go
+++ b/tempodb/encoding/common/interfaces.go
@@ -25,7 +25,9 @@ type Searcher interface {
 	SearchTags(ctx context.Context, cb TagCallback, opts SearchOptions) error
 	SearchTagValues(ctx context.Context, tag string, cb TagCallback, opts SearchOptions) error
 	SearchTagValuesV2(ctx context.Context, tag traceql.Attribute, cb TagCallbackV2, opts SearchOptions) error
+
 	Fetch(context.Context, traceql.FetchSpansRequest, SearchOptions) (traceql.FetchSpansResponse, error)
+	FetchMetadata(context.Context, []traceql.Spanset, SearchOptions) ([]traceql.SpansetMetadata, error) // jpe review this method signature - i assume this is necessary?
 }
 
 type CacheControl struct {
diff --git a/tempodb/encoding/v2/backend_block.go b/tempodb/encoding/v2/backend_block.go
index 8be4e155608..3b9aa889f8c 100644
--- a/tempodb/encoding/v2/backend_block.go
+++ b/tempodb/encoding/v2/backend_block.go
@@ -3,7 +3,6 @@ package v2
 import (
 	"bytes"
 	"context"
-	"errors"
 	"fmt"
 	"io"
 
@@ -249,5 +248,9 @@ func search(decoder model.ObjectDecoder, maxBytes int, id common.ID, obj []byte,
 }
 
 func (b *BackendBlock) Fetch(context.Context, traceql.FetchSpansRequest, common.SearchOptions) (traceql.FetchSpansResponse, error) {
-	return traceql.FetchSpansResponse{}, errors.New("Unsupported")
+	return traceql.FetchSpansResponse{}, common.ErrUnsupported
+}
+
+func (b *BackendBlock) FetchMetadata(context.Context, []traceql.Spanset, common.SearchOptions) ([]traceql.SpansetMetadata, error) {
+	return nil, common.ErrUnsupported
 }
diff --git a/tempodb/encoding/v2/wal_block.go b/tempodb/encoding/v2/wal_block.go
index eb6f0cecabb..a26402b0cef 100644
--- a/tempodb/encoding/v2/wal_block.go
+++ b/tempodb/encoding/v2/wal_block.go
@@ -285,6 +285,11 @@ func (a *walBlock) Fetch(context.Context, traceql.FetchSpansRequest, common.Sear
 	return traceql.FetchSpansResponse{}, common.ErrUnsupported
 }
 
+// FetchMetadata implements traceql.SpansetFetcher
+func (b *walBlock) FetchMetadata(context.Context, []traceql.Spanset, common.SearchOptions) ([]traceql.SpansetMetadata, error) {
+	return nil, common.ErrUnsupported
+}
+
 func (a *walBlock) fullFilename() string {
 	filename := a.fullFilenameSeparator("+")
 	_, e1 := os.Stat(filename)
diff --git a/tempodb/wal/wal_test.go b/tempodb/wal/wal_test.go
index 306fbfb2888..444c1cc8a41 100644
--- a/tempodb/wal/wal_test.go
+++ b/tempodb/wal/wal_test.go
@@ -291,17 +291,20 @@ func testFetch(t *testing.T, e encoding.VersionedEncoding) {
 			}
 			require.NoError(t, err)
 
-			// grab the first result and make sure it's the expected id
+			// grab the first result
 			ss, err := resp.Results.Next(ctx)
 			require.NoError(t, err)
 
-			expectedID := ids[i]
-			require.Equal(t, ss.TraceID, expectedID)
-
 			// confirm no more matches
 			ss, err = resp.Results.Next(ctx)
 			require.NoError(t, err)
 			require.Nil(t, ss)
+
+			// confirm traceid matches
+			ssmeta, err := block.FetchMetadata(ctx, []traceql.Spanset{*ss}, common.DefaultSearchOptions())
+			expectedID := ids[i]
+			require.Equal(t, 1, len(ssmeta))
+			require.Equal(t, ssmeta[0].TraceID, expectedID)
 		}
 	})
 }

From a7afed4a90951b7b77504187f6c17cb4b8bb33d7 Mon Sep 17 00:00:00 2001
From: Joe Elliott <number101010@gmail.com>
Date: Mon, 6 Feb 2023 09:46:48 -0500
Subject: [PATCH 04/27] technically compiles

Signed-off-by: Joe Elliott <number101010@gmail.com>
---
 pkg/traceql/storage.go                        |   2 +-
 .../encoding/vparquet/block_findtracebyid.go  |   3 +
 tempodb/encoding/vparquet/block_traceql.go    |  74 +++---
 .../encoding/vparquet/block_traceql_meta.go   | 216 ++++++++++++++++++
 .../encoding/vparquet/block_traceql_test.go   |  55 ++---
 tempodb/encoding/vparquet/wal_block.go        |  20 ++
 6 files changed, 286 insertions(+), 84 deletions(-)
 create mode 100644 tempodb/encoding/vparquet/block_traceql_meta.go

diff --git a/pkg/traceql/storage.go b/pkg/traceql/storage.go
index 7638cdf3e84..f5ff1f6f110 100644
--- a/pkg/traceql/storage.go
+++ b/pkg/traceql/storage.go
@@ -35,7 +35,7 @@ func (f *FetchSpansRequest) appendCondition(c ...Condition) {
 
 type Span struct {
 	// jpe need rownumber to go fetch metadata later?
-	rowNum     parquetquery.RowNumber
+	RowNum     parquetquery.RowNumber
 	Attributes map[Attribute]Static
 }
 
diff --git a/tempodb/encoding/vparquet/block_findtracebyid.go b/tempodb/encoding/vparquet/block_findtracebyid.go
index e30f73767c6..8e782eb4d29 100644
--- a/tempodb/encoding/vparquet/block_findtracebyid.go
+++ b/tempodb/encoding/vparquet/block_findtracebyid.go
@@ -178,6 +178,9 @@ func findTraceByID(ctx context.Context, traceID common.ID, meta *backend.BlockMe
 		return nil, nil
 	}
 
+	// jpe - two pass search might break due to this. confirm by writing a test that searches for spans in
+	//  row groups besides the first one.
+
 	// The row number coming out of the iterator is relative,
 	// so offset it using the num rows in all previous groups
 	rowMatch := int64(0)
diff --git a/tempodb/encoding/vparquet/block_traceql.go b/tempodb/encoding/vparquet/block_traceql.go
index 65f5c798260..4a73b9d79f8 100644
--- a/tempodb/encoding/vparquet/block_traceql.go
+++ b/tempodb/encoding/vparquet/block_traceql.go
@@ -3,7 +3,6 @@ package vparquet
 import (
 	"context"
 	"fmt"
-	"math"
 	"reflect"
 	"time"
 
@@ -45,6 +44,7 @@ const (
 	columnPathSpanName      = "rs.ils.Spans.Name"
 	columnPathSpanStartTime = "rs.ils.Spans.StartUnixNanos"
 	columnPathSpanEndTime   = "rs.ils.Spans.EndUnixNanos"
+	columnPathSpanKind      = "rs.ils.Spans.Kind"
 	//columnPathSpanDuration       = "rs.ils.Spans.DurationNanos"
 	columnPathSpanStatusCode     = "rs.ils.Spans.StatusCode"
 	columnPathSpanAttrKey        = "rs.ils.Spans.Attrs.Key"
@@ -212,6 +212,10 @@ func (i *mergeSpansetIterator) Next(ctx context.Context) (*traceql.Spanset, erro
 	return spanset, nil
 }
 
+// jpe - review iterators and how they are built here. there may be subtle implications on the span/attribute levels
+// imposed here that are no longer valid b/c of the two pass change. i.e. we no longer have to always grab span level
+// data.
+
 // fetch is the core logic for executing the given conditions against the parquet columns. The algorithm
 // can be summarized as a hiearchy of iterators where we iterate related columns together and collect the results
 // at each level into attributes, spans, and spansets.  Each condition (.foo=bar) is pushed down to the one or more
@@ -410,6 +414,10 @@ func createSpanIterator(makeIter makeIterFn, conditions []traceql.Condition, sta
 				return nil, err
 			}
 			durationPredicates = append(durationPredicates, pred)
+			addPredicate(columnPathSpanStartTime, nil)
+			addPredicate(columnPathSpanEndTime, nil)
+			columnSelectAs[columnPathSpanStartTime] = columnPathSpanStartTime // jpe what does this do?
+			columnSelectAs[columnPathSpanEndTime] = columnPathSpanEndTime
 			continue
 
 		case traceql.IntrinsicStatus:
@@ -459,16 +467,6 @@ func createSpanIterator(makeIter makeIterFn, conditions []traceql.Condition, sta
 		iters = append(iters, makeIter(columnPath, parquetquery.NewOrPredicate(predicates...), columnSelectAs[columnPath]))
 	}
 
-	// Time range filtering? jpe - move up to trace level?
-	var startFilter, endFilter parquetquery.Predicate
-	if start > 0 && end > 0 {
-		// Here's how we detect the span overlaps the time window:
-		// Span start <= req.End
-		// Span end >= req.Start
-		startFilter = parquetquery.NewIntBetweenPredicate(0, int64(end))
-		endFilter = parquetquery.NewIntBetweenPredicate(int64(start), math.MaxInt64)
-	}
-
 	var required []parquetquery.Iterator
 
 	minCount := 0
@@ -505,16 +503,14 @@ func createSpanIterator(makeIter makeIterFn, conditions []traceql.Condition, sta
 		iters = nil
 	}
 
-	// Static columns that are always loaded
-	// Since these are always present, they are put at the end and will only
-	// be read once the core conditions are met.
-	required = append(required, makeIter(columnPathSpanStartTime, startFilter, columnPathSpanStartTime))
-	required = append(required, makeIter(columnPathSpanEndTime, endFilter, columnPathSpanEndTime))
-	required = append(required, makeIter(columnPathSpanID, nil, columnPathSpanID))
-
 	// jpe if there are no direct conditions imposed on the span/span attributes level we are purposefully going to request the "Kind" column
-	//  b/c it is extremely cheap to retrieve. retrieving matching spans in this case will allow] aggregates such as "count" to be computed
+	//  b/c it is extremely cheap to retrieve. retrieving matching spans in this case will allow aggregates such as "count" to be computed
 	//  how do we know to pull duration for things like | avg(duration) > 1s? look at avg(span.http.status_code) it pushes a column request down here
+	if len(iters)+len(required) == 0 {
+		required = []parquetquery.Iterator{makeIter(columnPathSpanKind, nil, columnPathSpanKind)}
+	}
+
+	// jpe - should all iterators be required?
 
 	// Left join here means the span id/start/end iterators + 1 are required,
 	// and all other conditions are optional. Whatever matches is returned.
@@ -616,15 +612,10 @@ func createResourceIterator(makeIter makeIterFn, spanIterator parquetquery.Itera
 		required, iters, batchCol), nil
 }
 
+// jpe - modify to impose trace level time range condition
 func createTraceIterator(makeIter makeIterFn, resourceIter parquetquery.Iterator) parquetquery.Iterator {
 	traceIters := []parquetquery.Iterator{
 		resourceIter,
-		// Add static columns that are always return
-		makeIter(columnPathTraceID, nil, columnPathTraceID),
-		makeIter(columnPathStartTimeUnixNano, nil, columnPathStartTimeUnixNano),
-		makeIter(columnPathDurationNanos, nil, columnPathDurationNanos),
-		makeIter(columnPathRootSpanName, nil, columnPathRootSpanName),
-		makeIter(columnPathRootServiceName, nil, columnPathRootServiceName),
 	}
 
 	// Final trace iterator
@@ -933,25 +924,30 @@ type spanCollector struct {
 
 var _ parquetquery.GroupPredicate = (*spanCollector)(nil)
 
+// jpe - how do we restrict to 3 spans per trace?
 func (c *spanCollector) KeepGroup(res *parquetquery.IteratorResult) bool {
 
 	span := &traceql.Span{
 		Attributes: make(map[traceql.Attribute]traceql.Static),
+		RowNum:     res.RowNumber,
 	}
 
-	for _, e := range res.OtherEntries {
+	for _, e := range res.OtherEntries { // jpe - this is likely not going to be copied through correctly
 		span.Attributes[newSpanAttr(e.Key)] = e.Value.(traceql.Static)
 	}
 
+	var startTimeUnixNanos, endTimeUnixNanos uint64
+
 	// Merge all individual columns into the span
 	for _, kv := range res.Entries {
 		switch kv.Key {
-		case columnPathSpanID:
-			span.ID = kv.Value.ByteArray()
+		// jpe  move to spansetmeta collector?
+		// case columnPathSpanID:
+		// 	span.ID = kv.Value.ByteArray()
 		case columnPathSpanStartTime:
-			span.StartTimeUnixNanos = kv.Value.Uint64()
+			startTimeUnixNanos = kv.Value.Uint64()
 		case columnPathSpanEndTime:
-			span.EndtimeUnixNanos = kv.Value.Uint64()
+			endTimeUnixNanos = kv.Value.Uint64()
 		case columnPathSpanName:
 			span.Attributes[traceql.NewIntrinsic(traceql.IntrinsicName)] = traceql.NewStaticString(kv.Value.String())
 		//case columnPathSpanDuration:
@@ -988,8 +984,9 @@ func (c *spanCollector) KeepGroup(res *parquetquery.IteratorResult) bool {
 	}
 
 	// Save computed duration if any filters present and at least one is passed.
+	//    jpe move to spansetmeta collector?
 	if len(c.durationFilters) > 0 {
-		duration := span.EndtimeUnixNanos - span.StartTimeUnixNanos
+		duration := endTimeUnixNanos - startTimeUnixNanos
 		for _, f := range c.durationFilters {
 			if f == nil || f.Fn(int64(duration)) {
 				span.Attributes[traceql.NewIntrinsic(traceql.IntrinsicDuration)] = traceql.NewStaticDuration(time.Duration(duration))
@@ -1124,21 +1121,6 @@ var _ parquetquery.GroupPredicate = (*traceCollector)(nil)
 func (c *traceCollector) KeepGroup(res *parquetquery.IteratorResult) bool {
 	finalSpanset := &traceql.Spanset{}
 
-	for _, e := range res.Entries {
-		switch e.Key {
-		case columnPathTraceID:
-			finalSpanset.TraceID = e.Value.ByteArray()
-		case columnPathStartTimeUnixNano:
-			finalSpanset.StartTimeUnixNanos = e.Value.Uint64()
-		case columnPathDurationNanos:
-			finalSpanset.DurationNanos = e.Value.Uint64()
-		case columnPathRootSpanName:
-			finalSpanset.RootSpanName = e.Value.String()
-		case columnPathRootServiceName:
-			finalSpanset.RootServiceName = e.Value.String()
-		}
-	}
-
 	for _, e := range res.OtherEntries {
 		if spanset, ok := e.Value.(*traceql.Spanset); ok {
 			finalSpanset.Spans = append(finalSpanset.Spans, spanset.Spans...)
diff --git a/tempodb/encoding/vparquet/block_traceql_meta.go b/tempodb/encoding/vparquet/block_traceql_meta.go
new file mode 100644
index 00000000000..4fc9ed09e23
--- /dev/null
+++ b/tempodb/encoding/vparquet/block_traceql_meta.go
@@ -0,0 +1,216 @@
+package vparquet
+
+import (
+	"context"
+	"time"
+
+	"github.com/grafana/tempo/pkg/parquetquery"
+	v1 "github.com/grafana/tempo/pkg/tempopb/trace/v1"
+	"github.com/grafana/tempo/pkg/traceql"
+	"github.com/grafana/tempo/tempodb/encoding/common"
+	"github.com/segmentio/parquet-go"
+)
+
+// FetchMetadata for the given spansets from the block.
+func (b *backendBlock) FetchMetadata(ctx context.Context, ss []traceql.Spanset, opts common.SearchOptions) ([]traceql.SpansetMetadata, error) {
+	pf, _, err := b.openForSearch(ctx, opts)
+	if err != nil {
+		return nil, err
+	}
+
+	return fetchMetadata(ctx, ss, pf, opts)
+}
+
+// jpe does this correclty merge the span attributes into the output metadata?
+func fetchMetadata(ctx context.Context, ss []traceql.Spanset, pf *parquet.File, opts common.SearchOptions) ([]traceql.SpansetMetadata, error) {
+	rgs := pf.RowGroups() // jpe consolidate?
+	if opts.TotalPages > 0 {
+		// Read UP TO TotalPages.  The sharding calculations
+		// are just estimates, so it may not line up with the
+		// actual number of pages in this file.
+		if opts.StartPage+opts.TotalPages > len(rgs) {
+			opts.TotalPages = len(rgs) - opts.StartPage
+		}
+		rgs = rgs[opts.StartPage : opts.StartPage+opts.TotalPages]
+	}
+	makeIter := makeIterFunc(ctx, rgs, pf)
+
+	// collect rownumbers
+	rowNums := []parquetquery.RowNumber{} // jpe prealloc
+	for _, ss := range ss {
+		for _, span := range ss.Spans {
+			rowNums = append(rowNums, span.RowNum)
+		}
+	}
+
+	// span level iterator
+	iters := make([]parquetquery.Iterator, 0, 4)
+	iters = append(iters, &rowNumberIterator{rowNumbers: rowNums})
+	iters = append(iters, makeIter(columnPathSpanStartTime, nil, columnPathSpanStartTime))
+	iters = append(iters, makeIter(columnPathSpanEndTime, nil, columnPathSpanEndTime))
+	iters = append(iters, makeIter(columnPathSpanID, nil, columnPathSpanID))
+	spanIterator := parquetquery.NewJoinIterator(DefinitionLevelResourceSpans, iters, &spanCollector{})
+
+	// now wrap in a trace level iterator
+	traceIters := []parquetquery.Iterator{
+		spanIterator,
+		// Add static columns that are always return
+		makeIter(columnPathTraceID, nil, columnPathTraceID),
+		makeIter(columnPathStartTimeUnixNano, nil, columnPathStartTimeUnixNano),
+		makeIter(columnPathDurationNanos, nil, columnPathDurationNanos),
+		makeIter(columnPathRootSpanName, nil, columnPathRootSpanName),
+		makeIter(columnPathRootServiceName, nil, columnPathRootServiceName),
+	}
+	traceIter := parquetquery.NewJoinIterator(DefinitionLevelTrace, traceIters, &traceCollector{})
+
+	// jpe and return meta? should this function return an iterator as well?
+	meta := []traceql.SpansetMetadata{}
+	for {
+		res, err := traceIter.Next()
+		if res == nil {
+			break
+		}
+		if err != nil {
+			return nil, err
+		}
+
+		spansetMetaData := res.OtherEntries[0].Value.(*traceql.SpansetMetadata) // jpe one of these assertions is going to break, use key instead of index
+		meta = append(meta, *spansetMetaData)
+	}
+
+	return meta, nil
+}
+
+// This turns groups of span values into Span objects
+type spanMetaCollector struct {
+	minAttributes   int                                     // jpe wut
+	durationFilters []*parquetquery.GenericPredicate[int64] // jpe wut
+}
+
+var _ parquetquery.GroupPredicate = (*spanMetaCollector)(nil)
+
+// jpe - how do we restrict to 3 spans per trace?
+func (c *spanMetaCollector) KeepGroup(res *parquetquery.IteratorResult) bool {
+
+	span := &traceql.SpanMetadata{
+		Attributes: make(map[traceql.Attribute]traceql.Static),
+	}
+
+	for _, e := range res.OtherEntries { // jpe - this is likely not going to be copied through correctly
+		span.Attributes[newSpanAttr(e.Key)] = e.Value.(traceql.Static)
+	}
+
+	// Merge all individual columns into the span
+	for _, kv := range res.Entries {
+		switch kv.Key {
+		case columnPathSpanID:
+			span.ID = kv.Value.ByteArray()
+		case columnPathSpanStartTime:
+			span.StartTimeUnixNanos = kv.Value.Uint64()
+		case columnPathSpanEndTime:
+			span.EndtimeUnixNanos = kv.Value.Uint64()
+		case columnPathSpanName:
+			span.Attributes[traceql.NewIntrinsic(traceql.IntrinsicName)] = traceql.NewStaticString(kv.Value.String())
+		//case columnPathSpanDuration:
+		//	span.Attributes[traceql.NewIntrinsic(traceql.IntrinsicDuration)] = traceql.NewStaticDuration(time.Duration(kv.Value.Uint64()))
+		case columnPathSpanStatusCode:
+			// Map OTLP status code back to TraceQL enum.
+			// For other values, use the raw integer.
+			var status traceql.Status
+			switch kv.Value.Uint64() {
+			case uint64(v1.Status_STATUS_CODE_UNSET):
+				status = traceql.StatusUnset
+			case uint64(v1.Status_STATUS_CODE_OK):
+				status = traceql.StatusOk
+			case uint64(v1.Status_STATUS_CODE_ERROR):
+				status = traceql.StatusError
+			default:
+				status = traceql.Status(kv.Value.Uint64())
+			}
+			span.Attributes[traceql.NewIntrinsic(traceql.IntrinsicStatus)] = traceql.NewStaticStatus(status)
+		default:
+			// TODO - This exists for span-level dedicated columns like http.status_code
+			// Are nils possible here?
+			switch kv.Value.Kind() {
+			case parquet.Boolean:
+				span.Attributes[newSpanAttr(kv.Key)] = traceql.NewStaticBool(kv.Value.Boolean())
+			case parquet.Int32, parquet.Int64:
+				span.Attributes[newSpanAttr(kv.Key)] = traceql.NewStaticInt(int(kv.Value.Int64()))
+			case parquet.Float:
+				span.Attributes[newSpanAttr(kv.Key)] = traceql.NewStaticFloat(kv.Value.Double())
+			case parquet.ByteArray:
+				span.Attributes[newSpanAttr(kv.Key)] = traceql.NewStaticString(kv.Value.String())
+			}
+		}
+	}
+
+	// Save computed duration if any filters present and at least one is passed.
+	if len(c.durationFilters) > 0 {
+		duration := span.EndtimeUnixNanos - span.StartTimeUnixNanos
+		for _, f := range c.durationFilters {
+			if f == nil || f.Fn(int64(duration)) {
+				span.Attributes[traceql.NewIntrinsic(traceql.IntrinsicDuration)] = traceql.NewStaticDuration(time.Duration(duration))
+				break
+			}
+		}
+	}
+
+	if c.minAttributes > 0 {
+		count := 0
+		for _, v := range span.Attributes {
+			if v.Type != traceql.TypeNil {
+				count++
+			}
+		}
+		if count < c.minAttributes {
+			return false
+		}
+	}
+
+	res.Entries = res.Entries[:0]
+	res.OtherEntries = res.OtherEntries[:0]
+	res.AppendOtherValue("span", span)
+
+	return true
+}
+
+// traceMetaCollector receives rows from the resource-level matches.
+// It adds trace-level attributes into the spansets before
+// they are returned
+type traceMetaCollector struct {
+}
+
+var _ parquetquery.GroupPredicate = (*traceMetaCollector)(nil)
+
+func (c *traceMetaCollector) KeepGroup(res *parquetquery.IteratorResult) bool {
+	finalSpanset := &traceql.SpansetMetadata{}
+
+	for _, e := range res.Entries {
+		switch e.Key {
+		case columnPathTraceID:
+			finalSpanset.TraceID = e.Value.ByteArray()
+		case columnPathStartTimeUnixNano:
+			finalSpanset.StartTimeUnixNanos = e.Value.Uint64()
+		case columnPathDurationNanos:
+			finalSpanset.DurationNanos = e.Value.Uint64()
+		case columnPathRootSpanName:
+			finalSpanset.RootSpanName = e.Value.String()
+		case columnPathRootServiceName:
+			finalSpanset.RootServiceName = e.Value.String()
+		}
+	}
+
+	for _, e := range res.OtherEntries {
+		if spanset, ok := e.Value.(*traceql.SpansetMetadata); ok {
+			finalSpanset.Spans = append(finalSpanset.Spans, spanset.Spans...)
+		}
+	}
+
+	res.Entries = res.Entries[:0]
+	res.OtherEntries = res.OtherEntries[:0]
+	res.AppendOtherValue("spanset", finalSpanset)
+
+	return true
+}
+
+// jpe - how to marry in attribute data?
diff --git a/tempodb/encoding/vparquet/block_traceql_test.go b/tempodb/encoding/vparquet/block_traceql_test.go
index 2675509f5c3..7f0ac7a3fa4 100644
--- a/tempodb/encoding/vparquet/block_traceql_test.go
+++ b/tempodb/encoding/vparquet/block_traceql_test.go
@@ -19,6 +19,21 @@ import (
 	"github.com/grafana/tempo/tempodb/encoding/common"
 )
 
+func TestOne(t *testing.T) {
+	wantTr := fullyPopulatedTestTrace(nil)
+	b := makeBackendBlockWithTraces(t, []*Trace{wantTr})
+	ctx := context.Background()
+	req := traceql.MustExtractFetchSpansRequest(`{.foo =~ "xyz.*"}`)
+
+	resp, err := b.Fetch(ctx, req, common.DefaultSearchOptions())
+	require.NoError(t, err, "search request:", req)
+
+	spanSet, err := resp.Results.Next(ctx)
+	require.NoError(t, err, "search request:", req)
+	require.NotNil(t, spanSet, "search request:", req)
+}
+
+// jpe everywhere that metadata was removed needs an equivalent test
 func TestBackendBlockSearchTraceQL(t *testing.T) {
 	wantTr := fullyPopulatedTestTrace(nil)
 	b := makeBackendBlockWithTraces(t, []*Trace{wantTr})
@@ -152,8 +167,6 @@ func TestBackendBlockSearchTraceQL(t *testing.T) {
 		spanSet, err := resp.Results.Next(ctx)
 		require.NoError(t, err, "search request:", req)
 		require.NotNil(t, spanSet, "search request:", req)
-		require.Equal(t, wantTr.TraceID, spanSet.TraceID, "search request:", req)
-		require.Equal(t, []byte("spanid"), spanSet.Spans[0].ID, "search request:", req)
 	}
 
 	searchesThatDontMatch := []traceql.FetchSpansRequest{
@@ -254,12 +267,7 @@ func TestBackendBlockSearchTraceQLResults(t *testing.T) {
 
 	makeSpanset := func(traceID []byte, rootSpanName, rootServiceName string, startTimeUnixNano, durationNanos uint64, spans ...traceql.Span) traceql.Spanset {
 		return traceql.Spanset{
-			TraceID:            traceID,
-			RootSpanName:       rootSpanName,
-			RootServiceName:    rootServiceName,
-			StartTimeUnixNanos: startTimeUnixNano,
-			DurationNanos:      durationNanos,
-			Spans:              spans,
+			Spans: spans,
 		}
 	}
 
@@ -282,9 +290,6 @@ func TestBackendBlockSearchTraceQLResults(t *testing.T) {
 					wantTr.StartTimeUnixNano,
 					wantTr.DurationNanos,
 					traceql.Span{
-						ID:                 wantTr.ResourceSpans[0].ScopeSpans[0].Spans[0].ID,
-						StartTimeUnixNanos: wantTr.ResourceSpans[0].ScopeSpans[0].Spans[0].StartUnixNanos,
-						EndtimeUnixNanos:   wantTr.ResourceSpans[0].ScopeSpans[0].Spans[0].EndUnixNanos,
 						Attributes: map[traceql.Attribute]traceql.Static{
 							// foo not returned because the span didn't match it
 							traceql.NewScopedAttribute(traceql.AttributeScopeSpan, false, "bar"): traceql.NewStaticInt(123),
@@ -307,9 +312,6 @@ func TestBackendBlockSearchTraceQLResults(t *testing.T) {
 					wantTr.StartTimeUnixNano,
 					wantTr.DurationNanos,
 					traceql.Span{
-						ID:                 wantTr.ResourceSpans[0].ScopeSpans[0].Spans[0].ID,
-						StartTimeUnixNanos: wantTr.ResourceSpans[0].ScopeSpans[0].Spans[0].StartUnixNanos,
-						EndtimeUnixNanos:   wantTr.ResourceSpans[0].ScopeSpans[0].Spans[0].EndUnixNanos,
 						Attributes: map[traceql.Attribute]traceql.Static{
 							// Foo matched on resource.
 							// TODO - This seems misleading since the span has foo=<something else>
@@ -335,9 +337,6 @@ func TestBackendBlockSearchTraceQLResults(t *testing.T) {
 					wantTr.StartTimeUnixNano,
 					wantTr.DurationNanos,
 					traceql.Span{
-						ID:                 wantTr.ResourceSpans[0].ScopeSpans[0].Spans[0].ID,
-						StartTimeUnixNanos: wantTr.ResourceSpans[0].ScopeSpans[0].Spans[0].StartUnixNanos,
-						EndtimeUnixNanos:   wantTr.ResourceSpans[0].ScopeSpans[0].Spans[0].EndUnixNanos,
 						Attributes: map[traceql.Attribute]traceql.Static{
 							newSpanAttr(LabelHTTPStatusCode): traceql.NewStaticInt(500), // This is the only attribute that matched anything
 						},
@@ -362,9 +361,6 @@ func TestBackendBlockSearchTraceQLResults(t *testing.T) {
 					wantTr.StartTimeUnixNano,
 					wantTr.DurationNanos,
 					traceql.Span{
-						ID:                 wantTr.ResourceSpans[0].ScopeSpans[0].Spans[0].ID,
-						StartTimeUnixNanos: wantTr.ResourceSpans[0].ScopeSpans[0].Spans[0].StartUnixNanos,
-						EndtimeUnixNanos:   wantTr.ResourceSpans[0].ScopeSpans[0].Spans[0].EndUnixNanos,
 						Attributes: map[traceql.Attribute]traceql.Static{
 							newResAttr("foo"):                traceql.NewStaticString("abc"), // Both are returned
 							newSpanAttr("foo"):               traceql.NewStaticString("def"), // Both are returned
@@ -394,16 +390,10 @@ func TestBackendBlockSearchTraceQLResults(t *testing.T) {
 					wantTr.StartTimeUnixNano,
 					wantTr.DurationNanos,
 					traceql.Span{
-						ID:                 wantTr.ResourceSpans[0].ScopeSpans[0].Spans[0].ID,
-						StartTimeUnixNanos: wantTr.ResourceSpans[0].ScopeSpans[0].Spans[0].StartUnixNanos,
-						EndtimeUnixNanos:   wantTr.ResourceSpans[0].ScopeSpans[0].Spans[0].EndUnixNanos,
-						Attributes:         map[traceql.Attribute]traceql.Static{},
+						Attributes: map[traceql.Attribute]traceql.Static{},
 					},
 					traceql.Span{
-						ID:                 wantTr.ResourceSpans[1].ScopeSpans[0].Spans[0].ID,
-						StartTimeUnixNanos: wantTr.ResourceSpans[1].ScopeSpans[0].Spans[0].StartUnixNanos,
-						EndtimeUnixNanos:   wantTr.ResourceSpans[1].ScopeSpans[0].Spans[0].EndUnixNanos,
-						Attributes:         map[traceql.Attribute]traceql.Static{},
+						Attributes: map[traceql.Attribute]traceql.Static{},
 					},
 				),
 			),
@@ -423,9 +413,6 @@ func TestBackendBlockSearchTraceQLResults(t *testing.T) {
 					wantTr.StartTimeUnixNano,
 					wantTr.DurationNanos,
 					traceql.Span{
-						ID:                 wantTr.ResourceSpans[1].ScopeSpans[0].Spans[0].ID,
-						StartTimeUnixNanos: wantTr.ResourceSpans[1].ScopeSpans[0].Spans[0].StartUnixNanos,
-						EndtimeUnixNanos:   wantTr.ResourceSpans[1].ScopeSpans[0].Spans[0].EndUnixNanos,
 						Attributes: map[traceql.Attribute]traceql.Static{
 							traceql.NewIntrinsic(traceql.IntrinsicName):   traceql.NewStaticString("world"),
 							traceql.NewIntrinsic(traceql.IntrinsicStatus): traceql.NewStaticStatus(traceql.StatusUnset),
@@ -445,17 +432,11 @@ func TestBackendBlockSearchTraceQLResults(t *testing.T) {
 					wantTr.StartTimeUnixNano,
 					wantTr.DurationNanos,
 					traceql.Span{
-						ID:                 wantTr.ResourceSpans[0].ScopeSpans[0].Spans[0].ID,
-						StartTimeUnixNanos: wantTr.ResourceSpans[0].ScopeSpans[0].Spans[0].StartUnixNanos,
-						EndtimeUnixNanos:   wantTr.ResourceSpans[0].ScopeSpans[0].Spans[0].EndUnixNanos,
 						Attributes: map[traceql.Attribute]traceql.Static{
 							traceql.NewIntrinsic(traceql.IntrinsicDuration): traceql.NewStaticDuration(100 * time.Second),
 						},
 					},
 					traceql.Span{
-						ID:                 wantTr.ResourceSpans[1].ScopeSpans[0].Spans[0].ID,
-						StartTimeUnixNanos: wantTr.ResourceSpans[1].ScopeSpans[0].Spans[0].StartUnixNanos,
-						EndtimeUnixNanos:   wantTr.ResourceSpans[1].ScopeSpans[0].Spans[0].EndUnixNanos,
 						Attributes: map[traceql.Attribute]traceql.Static{
 							traceql.NewIntrinsic(traceql.IntrinsicDuration): traceql.NewStaticDuration(0 * time.Second),
 						},
diff --git a/tempodb/encoding/vparquet/wal_block.go b/tempodb/encoding/vparquet/wal_block.go
index 8f12acf8fe5..20a9e9b676b 100644
--- a/tempodb/encoding/vparquet/wal_block.go
+++ b/tempodb/encoding/vparquet/wal_block.go
@@ -592,6 +592,26 @@ func (b *walBlock) Fetch(ctx context.Context, req traceql.FetchSpansRequest, opt
 	}, nil
 }
 
+func (b *walBlock) FetchMetadata(ctx context.Context, ss []traceql.Spanset, opts common.SearchOptions) ([]traceql.SpansetMetadata, error) {
+	pages := b.readFlushes()
+	allMeta := []traceql.SpansetMetadata{}
+	for _, page := range pages {
+		file, err := page.file()
+		if err != nil {
+			return nil, fmt.Errorf("error opening file %s: %w", page.path, err)
+		}
+
+		meta, err := fetchMetadata(ctx, ss, file, opts) // jpe this is wrong. it's using row numbers to match spans, but that won't work across the wal files
+		if err != nil {
+			return nil, errors.Wrap(err, "creating fetch iter")
+		}
+		allMeta = append(allMeta, meta...)
+	}
+
+	// combine iters?
+	return allMeta, nil
+}
+
 func (b *walBlock) walPath() string {
 	filename := fmt.Sprintf("%s+%s+%s", b.meta.BlockID, b.meta.TenantID, VersionString)
 	return filepath.Join(b.path, filename)

From 12b2cdc6b7a0047f36f94619510959b049ff41fe Mon Sep 17 00:00:00 2001
From: Joe Elliott <number101010@gmail.com>
Date: Wed, 18 Jan 2023 16:08:47 -0500
Subject: [PATCH 05/27] added a way to dump the iterator tree

Signed-off-by: Joe Elliott <number101010@gmail.com>
---
 pkg/parquetquery/iters.go                     | 49 +++++++++++++++++
 pkg/parquetquery/predicate_test.go            |  1 +
 pkg/parquetquery/predicates.go                | 54 +++++++++++++++++++
 pkg/traceql/storage.go                        |  2 +
 pkg/util/tab_out.go                           | 10 ++++
 tempodb/encoding/vparquet/block_search.go     |  8 +++
 tempodb/encoding/vparquet/block_traceql.go    | 34 ++++++++++--
 .../encoding/vparquet/block_traceql_meta.go   |  9 ++++
 .../encoding/vparquet/block_traceql_test.go   | 11 +++-
 9 files changed, 172 insertions(+), 6 deletions(-)
 create mode 100644 pkg/util/tab_out.go

diff --git a/pkg/parquetquery/iters.go b/pkg/parquetquery/iters.go
index 05117660249..6b6f4754520 100644
--- a/pkg/parquetquery/iters.go
+++ b/pkg/parquetquery/iters.go
@@ -9,6 +9,7 @@ import (
 	"sync"
 	"sync/atomic"
 
+	"github.com/grafana/tempo/pkg/util"
 	"github.com/opentracing/opentracing-go"
 	"github.com/pkg/errors"
 	pq "github.com/segmentio/parquet-go"
@@ -184,6 +185,8 @@ func (r *IteratorResult) Columns(buffer [][]pq.Value, names ...string) [][]pq.Va
 
 // iterator - Every iterator follows this interface and can be composed.
 type Iterator interface {
+	fmt.Stringer
+
 	// Next returns nil when done
 	Next() (*IteratorResult, error)
 
@@ -288,6 +291,10 @@ func NewColumnIterator(ctx context.Context, rgs []pq.RowGroup, column int, colum
 	return c
 }
 
+func (c *ColumnIterator) String() string {
+	return fmt.Sprintf("ColumnIterator: %s \n\t%s", c.colName, util.TabOut(c.filter))
+}
+
 func (c *ColumnIterator) iterate(ctx context.Context, readSize int) {
 	defer close(c.ch)
 
@@ -577,6 +584,14 @@ func NewJoinIterator(definitionLevel int, iters []Iterator, pred GroupPredicate)
 	return &j
 }
 
+func (j *JoinIterator) String() string {
+	var iters string
+	for _, iter := range j.iters {
+		iters += "\n\t" + util.TabOut(iter)
+	}
+	return fmt.Sprintf("JoinIterator: %d\n\t%s\n%s)", j.definitionLevel, iters, j.pred)
+}
+
 func (j *JoinIterator) Next() (*IteratorResult, error) {
 	// Here is the algorithm for joins:  On each pass of the iterators
 	// we remember which ones are pointing at the earliest rows. If all
@@ -739,6 +754,18 @@ func NewLeftJoinIterator(definitionLevel int, required, optional []Iterator, pre
 	return &j
 }
 
+func (j *LeftJoinIterator) String() string {
+	srequired := "required: "
+	for _, r := range j.required {
+		srequired += "\n\t\t" + util.TabOut(r)
+	}
+	soptional := "optional: "
+	for _, o := range j.optional {
+		soptional += "\n\t\t" + util.TabOut(o)
+	}
+	return fmt.Sprintf("LeftJoinIterator: %d\n\t%s\n\t%s\n\t%s", j.definitionLevel, srequired, soptional, j.pred)
+}
+
 func (j *LeftJoinIterator) Next() (*IteratorResult, error) {
 
 	// Here is the algorithm for joins:  On each pass of the iterators
@@ -926,6 +953,14 @@ func NewUnionIterator(definitionLevel int, iters []Iterator, pred GroupPredicate
 	return &j
 }
 
+func (u *UnionIterator) String() string {
+	var iters string
+	for _, iter := range u.iters {
+		iters += iter.String() + ", "
+	}
+	return fmt.Sprintf("UnionIterator(%s)", iters)
+}
+
 func (u *UnionIterator) Next() (*IteratorResult, error) {
 	// Here is the algorithm for unions:  On each pass of the iterators
 	// we remember which ones are pointing at the earliest same row. The
@@ -1040,6 +1075,8 @@ func (u *UnionIterator) Close() {
 }
 
 type GroupPredicate interface {
+	fmt.Stringer
+
 	KeepGroup(*IteratorResult) bool
 }
 
@@ -1068,6 +1105,18 @@ func NewKeyValueGroupPredicate(keys, values []string) *KeyValueGroupPredicate {
 	return p
 }
 
+func (a *KeyValueGroupPredicate) String() string {
+	var skeys []string
+	var svals []string
+	for _, k := range a.keys {
+		skeys = append(skeys, string(k))
+	}
+	for _, v := range a.vals {
+		svals = append(svals, string(v))
+	}
+	return fmt.Sprintf("KeyValueGroupPredicate{%v, %v}", skeys, svals)
+}
+
 // KeepGroup checks if the given group contains all of the requested
 // key/value pairs.
 func (a *KeyValueGroupPredicate) KeepGroup(group *IteratorResult) bool {
diff --git a/pkg/parquetquery/predicate_test.go b/pkg/parquetquery/predicate_test.go
index 76ab2811e0d..c93e75e3765 100644
--- a/pkg/parquetquery/predicate_test.go
+++ b/pkg/parquetquery/predicate_test.go
@@ -27,6 +27,7 @@ func newAlwaysFalsePredicate() *mockPredicate {
 	return &mockPredicate{ret: false}
 }
 
+func (p *mockPredicate) String() string                           { return "mockPredicate{}" }
 func (p *mockPredicate) KeepValue(parquet.Value) bool             { p.valCalled = true; return p.ret }
 func (p *mockPredicate) KeepPage(parquet.Page) bool               { p.pageCalled = true; return p.ret }
 func (p *mockPredicate) KeepColumnChunk(parquet.ColumnChunk) bool { p.chunkCalled = true; return p.ret }
diff --git a/pkg/parquetquery/predicates.go b/pkg/parquetquery/predicates.go
index 6e9967e5b6c..8f3aef7c43d 100644
--- a/pkg/parquetquery/predicates.go
+++ b/pkg/parquetquery/predicates.go
@@ -2,6 +2,7 @@ package parquetquery
 
 import (
 	"bytes"
+	"fmt"
 	"regexp"
 	"strings"
 
@@ -11,6 +12,8 @@ import (
 // Predicate is a pushdown predicate that can be applied at
 // the chunk, page, and value levels.
 type Predicate interface {
+	fmt.Stringer
+
 	KeepColumnChunk(cc pq.ColumnChunk) bool
 	KeepPage(page pq.Page) bool
 	KeepValue(pq.Value) bool
@@ -36,6 +39,14 @@ func NewStringInPredicate(ss []string) Predicate {
 	return p
 }
 
+func (p *StringInPredicate) String() string {
+	var strings string
+	for _, s := range p.ss {
+		strings += fmt.Sprintf("%s, ", string(s))
+	}
+	return fmt.Sprintf("StringInPredicate{%s}", strings)
+}
+
 func (p *StringInPredicate) KeepColumnChunk(cc pq.ColumnChunk) bool {
 	p.helper.setNewRowGroup()
 
@@ -97,6 +108,14 @@ func NewRegexInPredicate(regs []string) (*RegexInPredicate, error) {
 	return p, nil
 }
 
+func (p *RegexInPredicate) String() string {
+	var strings string
+	for _, s := range p.regs {
+		strings += fmt.Sprintf("%s, ", s.String())
+	}
+	return fmt.Sprintf("RegexInPredicate{%s}", strings)
+}
+
 func (p *RegexInPredicate) keep(v *pq.Value) bool {
 	if v.IsNull() {
 		// Null
@@ -154,6 +173,10 @@ func NewSubstringPredicate(substring string) *SubstringPredicate {
 	}
 }
 
+func (p *SubstringPredicate) String() string {
+	return fmt.Sprintf("SubstringPredicate{%s}", p.substring)
+}
+
 func (p *SubstringPredicate) KeepColumnChunk(cc pq.ColumnChunk) bool {
 	p.helper.setNewRowGroup()
 
@@ -192,6 +215,10 @@ func NewIntBetweenPredicate(min, max int64) *IntBetweenPredicate {
 	return &IntBetweenPredicate{min, max}
 }
 
+func (p *IntBetweenPredicate) String() string {
+	return fmt.Sprintf("IntBetweenPredicate{%d,%d}", p.min, p.max)
+}
+
 func (p *IntBetweenPredicate) KeepColumnChunk(c pq.ColumnChunk) bool {
 
 	if ci := c.ColumnIndex(); ci != nil {
@@ -239,6 +266,10 @@ func NewGenericPredicate[T any](fn func(T) bool, rangeFn func(T, T) bool, extrac
 	return &GenericPredicate[T]{Fn: fn, RangeFn: rangeFn, Extract: extract}
 }
 
+func (p *GenericPredicate[T]) String() string {
+	return fmt.Sprintf("GenericPredicate{}")
+}
+
 func (p *GenericPredicate[T]) KeepColumnChunk(c pq.ColumnChunk) bool {
 	p.helper.setNewRowGroup()
 
@@ -307,6 +338,10 @@ func NewFloatBetweenPredicate(min, max float64) *FloatBetweenPredicate {
 	return &FloatBetweenPredicate{min, max}
 }
 
+func (p *FloatBetweenPredicate) String() string {
+	return fmt.Sprintf("FloatBetweenPredicate{%f,%f}", p.min, p.max)
+}
+
 func (p *FloatBetweenPredicate) KeepColumnChunk(c pq.ColumnChunk) bool {
 
 	if ci := c.ColumnIndex(); ci != nil {
@@ -347,6 +382,14 @@ func NewOrPredicate(preds ...Predicate) *OrPredicate {
 	}
 }
 
+func (p *OrPredicate) String() string {
+	var preds string
+	for _, pred := range p.preds {
+		preds += pred.String() + ","
+	}
+	return fmt.Sprintf("OrPredicate{%s}", p.preds)
+}
+
 func (p *OrPredicate) KeepColumnChunk(c pq.ColumnChunk) bool {
 	ret := false
 	for _, p := range p.preds {
@@ -403,6 +446,13 @@ type InstrumentedPredicate struct {
 
 var _ Predicate = (*InstrumentedPredicate)(nil)
 
+func (p *InstrumentedPredicate) String() string {
+	if p.pred == nil {
+		return fmt.Sprintf("InstrumentedPredicate{%d, nil}", p.InspectedValues)
+	}
+	return fmt.Sprintf("InstrumentedPredicate{%d, %s}", p.InspectedValues, p.pred)
+}
+
 func (p *InstrumentedPredicate) KeepColumnChunk(c pq.ColumnChunk) bool {
 	p.InspectedColumnChunks++
 
@@ -489,6 +539,10 @@ func NewSkipNilsPredicate() *SkipNilsPredicate {
 	return &SkipNilsPredicate{}
 }
 
+func (p *SkipNilsPredicate) String() string {
+	return "SkipNilsPredicate{}"
+}
+
 func (p *SkipNilsPredicate) KeepColumnChunk(c pq.ColumnChunk) bool {
 	return true
 }
diff --git a/pkg/traceql/storage.go b/pkg/traceql/storage.go
index f5ff1f6f110..da48228d5a5 100644
--- a/pkg/traceql/storage.go
+++ b/pkg/traceql/storage.go
@@ -16,6 +16,7 @@ type Condition struct {
 }
 
 type FetchSpansRequest struct {
+	Query              string
 	StartTimeUnixNanos uint64
 	EndTimeUnixNanos   uint64
 	Conditions         []Condition
@@ -93,6 +94,7 @@ func ExtractFetchSpansRequest(query string) (FetchSpansRequest, error) {
 	}
 
 	req := FetchSpansRequest{
+		Query:         query,
 		AllConditions: true,
 	}
 
diff --git a/pkg/util/tab_out.go b/pkg/util/tab_out.go
new file mode 100644
index 00000000000..05f836b88af
--- /dev/null
+++ b/pkg/util/tab_out.go
@@ -0,0 +1,10 @@
+package util
+
+import (
+	"fmt"
+	"strings"
+)
+
+func TabOut(s fmt.Stringer) string {
+	return strings.Replace(s.String(), "\n", "\n\t", -1)
+}
diff --git a/tempodb/encoding/vparquet/block_search.go b/tempodb/encoding/vparquet/block_search.go
index cedeab4c988..37ee9d2bd13 100644
--- a/tempodb/encoding/vparquet/block_search.go
+++ b/tempodb/encoding/vparquet/block_search.go
@@ -375,6 +375,10 @@ type rowNumberIterator struct {
 
 var _ pq.Iterator = (*rowNumberIterator)(nil)
 
+func (r *rowNumberIterator) String() string {
+	return "rowNumberIterator()"
+}
+
 func (r *rowNumberIterator) Next() (*pq.IteratorResult, error) {
 	if len(r.rowNumbers) == 0 {
 		return nil, nil
@@ -407,6 +411,10 @@ func newReportValuesPredicate(cb common.TagCallbackV2) *reportValuesPredicate {
 	return &reportValuesPredicate{cb: cb}
 }
 
+func (r *reportValuesPredicate) String() string {
+	return "reportValuesPredicate{}"
+}
+
 // KeepColumnChunk always returns true b/c we always have to dig deeper to find all values
 func (r *reportValuesPredicate) KeepColumnChunk(cc parquet.ColumnChunk) bool {
 	// Reinspect dictionary for each new column chunk
diff --git a/tempodb/encoding/vparquet/block_traceql.go b/tempodb/encoding/vparquet/block_traceql.go
index 4a73b9d79f8..c0475201766 100644
--- a/tempodb/encoding/vparquet/block_traceql.go
+++ b/tempodb/encoding/vparquet/block_traceql.go
@@ -415,8 +415,8 @@ func createSpanIterator(makeIter makeIterFn, conditions []traceql.Condition, sta
 			}
 			durationPredicates = append(durationPredicates, pred)
 			addPredicate(columnPathSpanStartTime, nil)
+			columnSelectAs[columnPathSpanStartTime] = columnPathSpanStartTime
 			addPredicate(columnPathSpanEndTime, nil)
-			columnSelectAs[columnPathSpanStartTime] = columnPathSpanStartTime // jpe what does this do?
 			columnSelectAs[columnPathSpanEndTime] = columnPathSpanEndTime
 			continue
 
@@ -455,7 +455,7 @@ func createSpanIterator(makeIter makeIterFn, conditions []traceql.Condition, sta
 	}
 
 	attrIter, err := createAttributeIterator(makeIter, genericConditions, DefinitionLevelResourceSpansILSSpanAttrs,
-		columnPathSpanAttrKey, columnPathSpanAttrString, columnPathSpanAttrInt, columnPathSpanAttrDouble, columnPathSpanAttrBool)
+		columnPathSpanAttrKey, columnPathSpanAttrString, columnPathSpanAttrInt, columnPathSpanAttrDouble, columnPathSpanAttrBool, allConditions)
 	if err != nil {
 		return nil, errors.Wrap(err, "creating span attribute iterator")
 	}
@@ -464,7 +464,7 @@ func createSpanIterator(makeIter makeIterFn, conditions []traceql.Condition, sta
 	}
 
 	for columnPath, predicates := range columnPredicates {
-		iters = append(iters, makeIter(columnPath, parquetquery.NewOrPredicate(predicates...), columnSelectAs[columnPath]))
+		iters = append(iters, makeIter(columnPath, parquetquery.NewOrPredicate(predicates...), columnSelectAs[columnPath])) // jpe why is this OR?
 	}
 
 	var required []parquetquery.Iterator
@@ -558,7 +558,7 @@ func createResourceIterator(makeIter makeIterFn, spanIterator parquetquery.Itera
 	}
 
 	attrIter, err := createAttributeIterator(makeIter, genericConditions, DefinitionLevelResourceAttrs,
-		columnPathResourceAttrKey, columnPathResourceAttrString, columnPathResourceAttrInt, columnPathResourceAttrDouble, columnPathResourceAttrBool)
+		columnPathResourceAttrKey, columnPathResourceAttrString, columnPathResourceAttrInt, columnPathResourceAttrDouble, columnPathResourceAttrBool, allConditions)
 	if err != nil {
 		return nil, errors.Wrap(err, "creating span attribute iterator")
 	}
@@ -836,6 +836,7 @@ func createBoolPredicate(op traceql.Operator, operands traceql.Operands) (parque
 func createAttributeIterator(makeIter makeIterFn, conditions []traceql.Condition,
 	definitionLevel int,
 	keyPath, strPath, intPath, floatPath, boolPath string,
+	allConditions bool,
 ) (parquetquery.Iterator, error) {
 	var (
 		attrKeys        = []string{}
@@ -907,6 +908,15 @@ func createAttributeIterator(makeIter makeIterFn, conditions []traceql.Condition
 	if len(valueIters) > 0 {
 		// LeftJoin means only look at rows where the key is what we want.
 		// Bring in any of the typed values as needed.
+
+		// if all conditions must be true we can use a simple join iterator to test the values one column at a time.
+		if allConditions {
+			iters := append([]parquetquery.Iterator{makeIter(keyPath, parquetquery.NewStringInPredicate(attrKeys), "key")}, valueIters...)
+			return parquetquery.NewJoinIterator(definitionLevel,
+				iters,
+				&attributeCollector{}), nil
+		}
+
 		return parquetquery.NewLeftJoinIterator(definitionLevel,
 			[]parquetquery.Iterator{makeIter(keyPath, parquetquery.NewStringInPredicate(attrKeys), "key")},
 			valueIters,
@@ -925,6 +935,10 @@ type spanCollector struct {
 var _ parquetquery.GroupPredicate = (*spanCollector)(nil)
 
 // jpe - how do we restrict to 3 spans per trace?
+func (c *spanCollector) String() string {
+	return fmt.Sprintf("spanCollector(%d, %v)", c.minAttributes, c.durationFilters)
+}
+
 func (c *spanCollector) KeepGroup(res *parquetquery.IteratorResult) bool {
 
 	span := &traceql.Span{
@@ -1023,6 +1037,10 @@ type batchCollector struct {
 
 var _ parquetquery.GroupPredicate = (*batchCollector)(nil)
 
+func (c *batchCollector) String() string {
+	return fmt.Sprintf("batchCollector{%v, %d}", c.requireAtLeastOneMatchOverall, c.minAttributes)
+}
+
 func (c *batchCollector) KeepGroup(res *parquetquery.IteratorResult) bool {
 
 	// TODO - This wraps everything up in a spanset per batch.
@@ -1118,6 +1136,10 @@ type traceCollector struct {
 
 var _ parquetquery.GroupPredicate = (*traceCollector)(nil)
 
+func (c *traceCollector) String() string {
+	return "traceCollector{}"
+}
+
 func (c *traceCollector) KeepGroup(res *parquetquery.IteratorResult) bool {
 	finalSpanset := &traceql.Spanset{}
 
@@ -1142,6 +1164,10 @@ type attributeCollector struct {
 
 var _ parquetquery.GroupPredicate = (*attributeCollector)(nil)
 
+func (c *attributeCollector) String() string {
+	return "attributeCollector{}"
+}
+
 func (c *attributeCollector) KeepGroup(res *parquetquery.IteratorResult) bool {
 
 	var key string
diff --git a/tempodb/encoding/vparquet/block_traceql_meta.go b/tempodb/encoding/vparquet/block_traceql_meta.go
index 4fc9ed09e23..4f6c42d7b6c 100644
--- a/tempodb/encoding/vparquet/block_traceql_meta.go
+++ b/tempodb/encoding/vparquet/block_traceql_meta.go
@@ -2,6 +2,7 @@ package vparquet
 
 import (
 	"context"
+	"fmt"
 	"time"
 
 	"github.com/grafana/tempo/pkg/parquetquery"
@@ -89,6 +90,10 @@ type spanMetaCollector struct {
 
 var _ parquetquery.GroupPredicate = (*spanMetaCollector)(nil)
 
+func (c *spanMetaCollector) String() string {
+	return fmt.Sprintf("spanMetaCollector(%d, %v)", c.minAttributes, c.durationFilters)
+}
+
 // jpe - how do we restrict to 3 spans per trace?
 func (c *spanMetaCollector) KeepGroup(res *parquetquery.IteratorResult) bool {
 
@@ -182,6 +187,10 @@ type traceMetaCollector struct {
 
 var _ parquetquery.GroupPredicate = (*traceMetaCollector)(nil)
 
+func (c *traceMetaCollector) String() string {
+	return "traceMetaCollector{}"
+}
+
 func (c *traceMetaCollector) KeepGroup(res *parquetquery.IteratorResult) bool {
 	finalSpanset := &traceql.SpansetMetadata{}
 
diff --git a/tempodb/encoding/vparquet/block_traceql_test.go b/tempodb/encoding/vparquet/block_traceql_test.go
index 7f0ac7a3fa4..34e14bdbc13 100644
--- a/tempodb/encoding/vparquet/block_traceql_test.go
+++ b/tempodb/encoding/vparquet/block_traceql_test.go
@@ -2,6 +2,7 @@ package vparquet
 
 import (
 	"context"
+	"fmt"
 	"path"
 	"testing"
 	"time"
@@ -28,12 +29,18 @@ func TestOne(t *testing.T) {
 	resp, err := b.Fetch(ctx, req, common.DefaultSearchOptions())
 	require.NoError(t, err, "search request:", req)
 
+	fmt.Println("-----------")
+	fmt.Println(req.Query)
+	fmt.Println("-----------")
+	fmt.Println(resp.Results.(*spansetIterator).iter)
+
 	spanSet, err := resp.Results.Next(ctx)
 	require.NoError(t, err, "search request:", req)
-	require.NotNil(t, spanSet, "search request:", req)
+
+	fmt.Println("-----------")
+	fmt.Println(spanSet)
 }
 
-// jpe everywhere that metadata was removed needs an equivalent test
 func TestBackendBlockSearchTraceQL(t *testing.T) {
 	wantTr := fullyPopulatedTestTrace(nil)
 	b := makeBackendBlockWithTraces(t, []*Trace{wantTr})

From 1bdd95a981afb50855f97cea17fc602ade8f7cc1 Mon Sep 17 00:00:00 2001
From: Joe Elliott <number101010@gmail.com>
Date: Mon, 6 Feb 2023 13:48:17 -0500
Subject: [PATCH 06/27] fixin' bugs

Signed-off-by: Joe Elliott <number101010@gmail.com>
---
 pkg/parquetquery/iters.go                     |  1 +
 tempodb/encoding/vparquet/block_traceql.go    | 49 +++++++++++++++----
 .../encoding/vparquet/block_traceql_test.go   | 39 ++++++++++-----
 3 files changed, 68 insertions(+), 21 deletions(-)

diff --git a/pkg/parquetquery/iters.go b/pkg/parquetquery/iters.go
index 6b6f4754520..1ecfb954cc2 100644
--- a/pkg/parquetquery/iters.go
+++ b/pkg/parquetquery/iters.go
@@ -742,6 +742,7 @@ type LeftJoinIterator struct {
 var _ Iterator = (*LeftJoinIterator)(nil)
 
 func NewLeftJoinIterator(definitionLevel int, required, optional []Iterator, pred GroupPredicate) *LeftJoinIterator {
+	// jpe - left join iterator loops infinitely if required is empty
 	j := LeftJoinIterator{
 		definitionLevel: definitionLevel,
 		required:        required,
diff --git a/tempodb/encoding/vparquet/block_traceql.go b/tempodb/encoding/vparquet/block_traceql.go
index c0475201766..9621eaef93f 100644
--- a/tempodb/encoding/vparquet/block_traceql.go
+++ b/tempodb/encoding/vparquet/block_traceql.go
@@ -3,6 +3,7 @@ package vparquet
 import (
 	"context"
 	"fmt"
+	"math"
 	"reflect"
 	"time"
 
@@ -22,6 +23,7 @@ type makeIterFn func(columnName string, predicate parquetquery.Predicate, select
 const (
 	columnPathTraceID                  = "TraceID"
 	columnPathStartTimeUnixNano        = "StartTimeUnixNano"
+	columnPathEndTimeUnixNano          = "EndTimeUnixNano"
 	columnPathDurationNanos            = "DurationNanos"
 	columnPathRootSpanName             = "RootSpanName"
 	columnPathRootServiceName          = "RootServiceName"
@@ -363,7 +365,7 @@ func fetch(ctx context.Context, req traceql.FetchSpansRequest, pf *parquet.File,
 		allConditions = req.AllConditions && !mingledConditions
 	)
 
-	spanIter, err := createSpanIterator(makeIter, spanConditions, req.StartTimeUnixNanos, req.EndTimeUnixNanos, spanRequireAtLeastOneMatch, allConditions)
+	spanIter, err := createSpanIterator(makeIter, spanConditions, spanRequireAtLeastOneMatch, allConditions)
 	if err != nil {
 		return nil, errors.Wrap(err, "creating span iterator")
 	}
@@ -373,14 +375,14 @@ func fetch(ctx context.Context, req traceql.FetchSpansRequest, pf *parquet.File,
 		return nil, errors.Wrap(err, "creating resource iterator")
 	}
 
-	traceIter := createTraceIterator(makeIter, resourceIter)
+	traceIter := createTraceIterator(makeIter, resourceIter, req.StartTimeUnixNanos, req.EndTimeUnixNanos)
 
 	return &spansetIterator{traceIter}, nil
 }
 
 // createSpanIterator iterates through all span-level columns, groups them into rows representing
 // one span each.  Spans are returned that match any of the given conditions.
-func createSpanIterator(makeIter makeIterFn, conditions []traceql.Condition, start, end uint64, requireAtLeastOneMatch, allConditions bool) (parquetquery.Iterator, error) {
+func createSpanIterator(makeIter makeIterFn, conditions []traceql.Condition, requireAtLeastOneMatch, allConditions bool) (parquetquery.Iterator, error) {
 
 	var (
 		columnSelectAs     = map[string]string{}
@@ -417,7 +419,7 @@ func createSpanIterator(makeIter makeIterFn, conditions []traceql.Condition, sta
 			addPredicate(columnPathSpanStartTime, nil)
 			columnSelectAs[columnPathSpanStartTime] = columnPathSpanStartTime
 			addPredicate(columnPathSpanEndTime, nil)
-			columnSelectAs[columnPathSpanEndTime] = columnPathSpanEndTime
+			columnSelectAs[columnPathSpanEndTime] = columnPathSpanEndTime // jpe these all end up in an orpred but i don't think they should
 			continue
 
 		case traceql.IntrinsicStatus:
@@ -484,6 +486,7 @@ func createSpanIterator(makeIter makeIterFn, conditions []traceql.Condition, sta
 	spanCol := &spanCollector{
 		minCount,
 		durationPredicates,
+		false,
 	}
 
 	// This is an optimization for when all of the span conditions must be met.
@@ -506,12 +509,15 @@ func createSpanIterator(makeIter makeIterFn, conditions []traceql.Condition, sta
 	// jpe if there are no direct conditions imposed on the span/span attributes level we are purposefully going to request the "Kind" column
 	//  b/c it is extremely cheap to retrieve. retrieving matching spans in this case will allow aggregates such as "count" to be computed
 	//  how do we know to pull duration for things like | avg(duration) > 1s? look at avg(span.http.status_code) it pushes a column request down here
-	if len(iters)+len(required) == 0 {
+	// does the iterator infinite loop if we don't do this?
+	//  the entire engine is built around spans. we have to return at least one entry for every span to the layers above for things to work
+	if len(required) == 0 {
 		required = []parquetquery.Iterator{makeIter(columnPathSpanKind, nil, columnPathSpanKind)}
+		// jpe - this almost doesn't matter and it might be more straightforward to just return this row and let the engine reject it?
+		// without this queries like { .cluster = "cluster" } fail b/c they rely on the span iterator to return something
+		spanCol.kindAsCount = true // signal to the collector we are only grabbing kind to count spans and we should not store it in attributes
 	}
 
-	// jpe - should all iterators be required?
-
 	// Left join here means the span id/start/end iterators + 1 are required,
 	// and all other conditions are optional. Whatever matches is returned.
 	return parquetquery.NewLeftJoinIterator(DefinitionLevelResourceSpansILSSpan, required, iters, spanCol), nil
@@ -613,9 +619,25 @@ func createResourceIterator(makeIter makeIterFn, spanIterator parquetquery.Itera
 }
 
 // jpe - modify to impose trace level time range condition
-func createTraceIterator(makeIter makeIterFn, resourceIter parquetquery.Iterator) parquetquery.Iterator {
-	traceIters := []parquetquery.Iterator{
-		resourceIter,
+func createTraceIterator(makeIter makeIterFn, resourceIter parquetquery.Iterator, start, end uint64) parquetquery.Iterator {
+	traceIters := make([]parquetquery.Iterator, 0, 3)
+
+	// order is interesting here. would it be more efficient to grab the span/resource conditions first
+	// or the time range filtering first?
+	traceIters = append(traceIters, resourceIter)
+
+	// evaluate time range
+	// Time range filtering?
+	if start > 0 && end > 0 {
+		// Here's how we detect the span overlaps the time window:
+		// Span start <= req.End
+		// Span end >= req.Start
+		var startFilter, endFilter parquetquery.Predicate
+		startFilter = parquetquery.NewIntBetweenPredicate(0, int64(end))
+		endFilter = parquetquery.NewIntBetweenPredicate(int64(start), math.MaxInt64)
+
+		traceIters = append(traceIters, makeIter(columnPathStartTimeUnixNano, startFilter, columnPathStartTimeUnixNano))
+		traceIters = append(traceIters, makeIter(columnPathEndTimeUnixNano, endFilter, columnPathEndTimeUnixNano))
 	}
 
 	// Final trace iterator
@@ -930,6 +952,7 @@ func createAttributeIterator(makeIter makeIterFn, conditions []traceql.Condition
 type spanCollector struct {
 	minAttributes   int
 	durationFilters []*parquetquery.GenericPredicate[int64]
+	kindAsCount     bool
 }
 
 var _ parquetquery.GroupPredicate = (*spanCollector)(nil)
@@ -981,6 +1004,12 @@ func (c *spanCollector) KeepGroup(res *parquetquery.IteratorResult) bool {
 				status = traceql.Status(kv.Value.Uint64())
 			}
 			span.Attributes[traceql.NewIntrinsic(traceql.IntrinsicStatus)] = traceql.NewStaticStatus(status)
+		case columnPathSpanKind:
+			// if we're acutally retrieving kind then keep it, otherwise it's just being used to count spans and we should not
+			//  include it in our attributes
+			if !c.kindAsCount {
+				span.Attributes[newSpanAttr(columnPathSpanKind)] = traceql.NewStaticInt(int(kv.Value.Int64()))
+			}
 		default:
 			// TODO - This exists for span-level dedicated columns like http.status_code
 			// Are nils possible here?
diff --git a/tempodb/encoding/vparquet/block_traceql_test.go b/tempodb/encoding/vparquet/block_traceql_test.go
index 34e14bdbc13..6d92ece9be0 100644
--- a/tempodb/encoding/vparquet/block_traceql_test.go
+++ b/tempodb/encoding/vparquet/block_traceql_test.go
@@ -24,19 +24,21 @@ func TestOne(t *testing.T) {
 	wantTr := fullyPopulatedTestTrace(nil)
 	b := makeBackendBlockWithTraces(t, []*Trace{wantTr})
 	ctx := context.Background()
-	req := traceql.MustExtractFetchSpansRequest(`{.foo =~ "xyz.*"}`)
+	req := traceql.MustExtractFetchSpansRequest(`{ true }`)
+
+	req.StartTimeUnixNanos = uint64(1000 * time.Second)
+	req.EndTimeUnixNanos = uint64(1001 * time.Second)
 
 	resp, err := b.Fetch(ctx, req, common.DefaultSearchOptions())
 	require.NoError(t, err, "search request:", req)
 
+	spanSet, err := resp.Results.Next(ctx)
+	require.NoError(t, err, "search request:", req)
+
 	fmt.Println("-----------")
 	fmt.Println(req.Query)
 	fmt.Println("-----------")
 	fmt.Println(resp.Results.(*spansetIterator).iter)
-
-	spanSet, err := resp.Results.Next(ctx)
-	require.NoError(t, err, "search request:", req)
-
 	fmt.Println("-----------")
 	fmt.Println(spanSet)
 }
@@ -49,9 +51,19 @@ func TestBackendBlockSearchTraceQL(t *testing.T) {
 	searchesThatMatch := []traceql.FetchSpansRequest{
 		{}, // Empty request
 		{
-			// Time range
-			StartTimeUnixNanos: uint64(101 * time.Second),
-			EndTimeUnixNanos:   uint64(102 * time.Second),
+			// Time range inside trace
+			StartTimeUnixNanos: uint64(1100 * time.Second),
+			EndTimeUnixNanos:   uint64(1200 * time.Second),
+		},
+		{
+			// Time range overlap start
+			StartTimeUnixNanos: uint64(900 * time.Second),
+			EndTimeUnixNanos:   uint64(1100 * time.Second),
+		},
+		{
+			// Time range overlap end
+			StartTimeUnixNanos: uint64(1900 * time.Second),
+			EndTimeUnixNanos:   uint64(2100 * time.Second),
 		},
 		// Intrinsics
 		traceql.MustExtractFetchSpansRequest(`{` + LabelName + ` = "hello"}`),
@@ -189,9 +201,14 @@ func TestBackendBlockSearchTraceQL(t *testing.T) {
 		traceql.MustExtractFetchSpansRequest(`{.` + LabelHTTPStatusCode + ` > 600}`),                  // Well-known attribute: http.status_code not match
 		traceql.MustExtractFetchSpansRequest(`{.foo = "xyz" || .` + LabelHTTPStatusCode + " = 1000}"), // Matches neither condition
 		{
-			// Outside time range
-			StartTimeUnixNanos: uint64(300 * time.Second),
-			EndTimeUnixNanos:   uint64(400 * time.Second),
+			// Time range after trace
+			StartTimeUnixNanos: uint64(3000 * time.Second),
+			EndTimeUnixNanos:   uint64(4000 * time.Second),
+		},
+		{
+			// Time range before trace
+			StartTimeUnixNanos: uint64(600 * time.Second),
+			EndTimeUnixNanos:   uint64(700 * time.Second),
 		},
 		{
 			// Matches some conditions but not all

From b72164833bb351688448598b5be252aaf21dcb21 Mon Sep 17 00:00:00 2001
From: Joe Elliott <number101010@gmail.com>
Date: Tue, 7 Feb 2023 12:28:22 -0500
Subject: [PATCH 07/27] add meta support

Signed-off-by: Joe Elliott <number101010@gmail.com>
---
 pkg/parquetquery/iters.go                     |  10 +-
 pkg/traceql/engine.go                         |  69 +++--
 pkg/traceql/engine_test.go                    |  61 +++--
 pkg/traceql/storage.go                        |  34 ++-
 tempodb/encoding/common/interfaces.go         |   1 -
 tempodb/encoding/v2/backend_block.go          |   4 -
 tempodb/encoding/v2/wal_block.go              |   5 -
 tempodb/encoding/vparquet/block_search.go     |  27 +-
 tempodb/encoding/vparquet/block_traceql.go    | 148 ++++++----
 .../encoding/vparquet/block_traceql_meta.go   | 239 ++++++++--------
 .../vparquet/block_traceql_meta_test.go       | 254 ++++++++++++++++++
 .../encoding/vparquet/block_traceql_test.go   | 221 +--------------
 tempodb/encoding/vparquet/wal_block.go        |  22 +-
 tempodb/wal/wal_test.go                       |  13 +-
 14 files changed, 592 insertions(+), 516 deletions(-)
 create mode 100644 tempodb/encoding/vparquet/block_traceql_meta_test.go

diff --git a/pkg/parquetquery/iters.go b/pkg/parquetquery/iters.go
index 1ecfb954cc2..3a4aa1dfd29 100644
--- a/pkg/parquetquery/iters.go
+++ b/pkg/parquetquery/iters.go
@@ -149,6 +149,15 @@ func (r *IteratorResult) AppendOtherValue(k string, v interface{}) {
 	}{k, v})
 }
 
+func (r *IteratorResult) OtherValueFromKey(k string) interface{} {
+	for _, e := range r.OtherEntries {
+		if e.Key == k {
+			return e.Value
+		}
+	}
+	return nil
+}
+
 // ToMap converts the unstructured list of data into a map containing an entry
 // for each column, and the lists of values.  The order of columns is
 // not preseved, but the order of values within each column is.
@@ -742,7 +751,6 @@ type LeftJoinIterator struct {
 var _ Iterator = (*LeftJoinIterator)(nil)
 
 func NewLeftJoinIterator(definitionLevel int, required, optional []Iterator, pred GroupPredicate) *LeftJoinIterator {
-	// jpe - left join iterator loops infinitely if required is empty
 	j := LeftJoinIterator{
 		definitionLevel: definitionLevel,
 		required:        required,
diff --git a/pkg/traceql/engine.go b/pkg/traceql/engine.go
index 16de463903f..a4a2991a555 100644
--- a/pkg/traceql/engine.go
+++ b/pkg/traceql/engine.go
@@ -36,64 +36,59 @@ func (e *Engine) Execute(ctx context.Context, searchReq *tempopb.SearchRequest,
 	span.SetTag("pipeline", rootExpr.Pipeline)
 	span.SetTag("fetchSpansRequest", fetchSpansRequest)
 
-	fetchSpansResponse, err := spanSetFetcher.Fetch(ctx, fetchSpansRequest)
-	if err != nil {
-		return nil, err
-	}
-	iterator := fetchSpansResponse.Results
-
 	spansetsEvaluated := 0
-
-	results := []Spanset{}
-	for {
-		spanSet, err := iterator.Next(ctx)
-		if err != nil {
-			span.LogKV("msg", "iterator.Next", "err", err)
-			return nil, err
-		}
-
-		if spanSet == nil {
-			break
+	// set up the expression evaluation as a filter to reduce data pulled
+	fetchSpansRequest.Filter = func(inSS Spanset) ([]Spanset, error) {
+		if len(inSS.Spans) == 0 {
+			return nil, nil
 		}
 
-		ss, err := rootExpr.Pipeline.evaluate([]Spanset{*spanSet})
+		evalSS, err := rootExpr.Pipeline.evaluate([]Spanset{inSS})
 		if err != nil {
 			span.LogKV("msg", "pipeline.evaluate", "err", err)
 			return nil, err
 		}
-		spansetsEvaluated++
 
-		if len(ss) == 0 {
-			continue
+		spansetsEvaluated++
+		if len(evalSS) == 0 {
+			return nil, nil
 		}
 
-		results = append(results, ss...)
-
-		if len(results) == int(searchReq.Limit) {
-			break
+		// reduce all evalSS to their max length to reduce meta data lookups
+		for i := range evalSS {
+			if len(evalSS[i].Spans) > e.spansPerSpanSet {
+				evalSS[i].Spans = evalSS[i].Spans[:e.spansPerSpanSet]
+			}
 		}
-	}
 
-	if len(results) == 0 {
-		return &tempopb.SearchResponse{
-			Traces: nil,
-			// TODO capture and update metrics
-			Metrics: &tempopb.SearchMetrics{},
-		}, nil
+		return evalSS, nil
 	}
 
-	ss, err := spanSetFetcher.FetchMetadata(ctx, results)
+	fetchSpansResponse, err := spanSetFetcher.Fetch(ctx, fetchSpansRequest)
 	if err != nil {
 		return nil, err
 	}
+	iterator := fetchSpansResponse.Results
 
 	res := &tempopb.SearchResponse{
 		Traces: nil,
 		// TODO capture and update metrics
 		Metrics: &tempopb.SearchMetrics{},
 	}
-	for _, spanSet := range ss {
-		res.Traces = append(res.Traces, e.asTraceSearchMetadata(spanSet))
+	for {
+		spanset, err := iterator.Next(ctx)
+		if spanset == nil {
+			break
+		}
+		if err != nil {
+			span.LogKV("msg", "iterator.Next", "err", err)
+			return nil, err
+		}
+		res.Traces = append(res.Traces, e.asTraceSearchMetadata(*spanset)) // jpe pointer <> val conversion
+
+		if len(res.Traces) >= int(searchReq.Limit) && searchReq.Limit > 0 {
+			break
+		}
 	}
 
 	span.SetTag("spansets_evaluated", spansetsEvaluated)
@@ -166,10 +161,6 @@ func (e *Engine) asTraceSearchMetadata(spanset SpansetMetadata) *tempopb.TraceSe
 		}
 
 		metadata.SpanSet.Spans = append(metadata.SpanSet.Spans, tempopbSpan)
-
-		if e.spansPerSpanSet != 0 && len(metadata.SpanSet.Spans) == e.spansPerSpanSet {
-			break
-		}
 	}
 
 	return metadata
diff --git a/pkg/traceql/engine_test.go b/pkg/traceql/engine_test.go
index dedb4648160..a27a162db72 100644
--- a/pkg/traceql/engine_test.go
+++ b/pkg/traceql/engine_test.go
@@ -19,8 +19,6 @@ import (
 	"github.com/grafana/tempo/pkg/util"
 )
 
-// jpe - everywhere you removed ID replace with span rownumbers
-
 func TestEngine_Execute(t *testing.T) {
 	now := time.Now()
 	e := Engine{}
@@ -30,15 +28,22 @@ func TestEngine_Execute(t *testing.T) {
 	}
 	spanSetFetcher := MockSpanSetFetcher{
 		iterator: &MockSpanSetIterator{
-			results: []*Spanset{
+			results: []*SpansetMetadata{
 				{
-					Spans: []Span{
+					TraceID:         []byte{1},
+					RootSpanName:    "HTTP GET",
+					RootServiceName: "my-service",
+					Spans: []SpanMetadata{
 						{
+							ID: []byte{1},
 							Attributes: map[Attribute]Static{
 								NewAttribute("foo"): NewStaticString("value"),
 							},
 						},
 						{
+							ID:                 []byte{2},
+							StartTimeUnixNanos: uint64(now.UnixNano()),
+							EndtimeUnixNanos:   uint64(now.Add(100 * time.Millisecond).UnixNano()),
 							Attributes: map[Attribute]Static{
 								NewAttribute("foo"): NewStaticString("value"),
 								NewAttribute("bar"): NewStaticString("value"),
@@ -47,8 +52,12 @@ func TestEngine_Execute(t *testing.T) {
 					},
 				},
 				{
-					Spans: []Span{
+					TraceID:         []byte{2},
+					RootSpanName:    "HTTP POST",
+					RootServiceName: "my-service",
+					Spans: []SpanMetadata{
 						{
+							ID: []byte{3},
 							Attributes: map[Attribute]Static{
 								NewAttribute("bar"): NewStaticString("value"),
 							},
@@ -69,6 +78,7 @@ func TestEngine_Execute(t *testing.T) {
 		},
 		AllConditions: true,
 	}
+	spanSetFetcher.capturedRequest.Filter = nil // have to set this to nil b/c assert.Equal does not handle function pointers
 	assert.Equal(t, expectedFetchSpansRequest, spanSetFetcher.capturedRequest)
 
 	expectedTraceSearchMetadata := []*tempopb.TraceSearchMetadata{
@@ -252,26 +262,47 @@ var _ = (SpansetFetcher)(&MockSpanSetFetcher{})
 
 func (m *MockSpanSetFetcher) Fetch(ctx context.Context, request FetchSpansRequest) (FetchSpansResponse, error) {
 	m.capturedRequest = request
+	m.iterator.(*MockSpanSetIterator).filter = request.Filter
 	return FetchSpansResponse{
 		Results: m.iterator,
 	}, nil
 }
 
-func (m *MockSpanSetFetcher) FetchMetadata(context.Context, []Spanset) ([]SpansetMetadata, error) {
-	return nil, fmt.Errorf("not implemented") // jpe test?
+type MockSpanSetIterator struct {
+	results []*SpansetMetadata
+	filter  FilterSpans
 }
 
-type MockSpanSetIterator struct {
-	results []*Spanset
+func (m *MockSpanSetIterator) Next(ctx context.Context) (*SpansetMetadata, error) {
+	for {
+		if len(m.results) == 0 {
+			return nil, nil
+		}
+		r := m.results[0]
+		m.results = m.results[1:]
+
+		ss, err := m.filter(spansetFromMetaData(r))
+		if err != nil {
+			return nil, err
+		}
+		if len(ss) == 0 {
+			return nil, nil
+		}
+
+		r.Spans = r.Spans[len(ss):] // jpe this is horrible - really should match spans passed by m.filter and only pass that metadata
+		return r, nil
+	}
 }
 
-func (m *MockSpanSetIterator) Next(ctx context.Context) (*Spanset, error) {
-	if len(m.results) == 0 {
-		return nil, nil
+func spansetFromMetaData(meta *SpansetMetadata) Spanset { // jpe weird pointer conversion
+	ret := Spanset{}
+	for _, s := range meta.Spans {
+		s := Span{
+			Attributes: s.Attributes,
+		}
+		ret.Spans = append(ret.Spans, s)
 	}
-	r := m.results[0]
-	m.results = m.results[1:]
-	return r, nil
+	return ret
 }
 
 func newCondition(attr Attribute, op Operator, operands ...Static) Condition {
diff --git a/pkg/traceql/storage.go b/pkg/traceql/storage.go
index da48228d5a5..a742fd81aeb 100644
--- a/pkg/traceql/storage.go
+++ b/pkg/traceql/storage.go
@@ -2,7 +2,6 @@ package traceql
 
 import (
 	"context"
-	"fmt"
 
 	"github.com/grafana/tempo/pkg/parquetquery"
 )
@@ -15,6 +14,13 @@ type Condition struct {
 	Operands  Operands
 }
 
+// FilterSpans is a hint that allows the calling code to filter down spans to only
+// those that metadata needs to be retrieved for. If the returned Spanset has no
+// spans it is discarded and will not appear in FetchSpansResponse. The bool
+// return value is used to indicate if the Fetcher should continue iterating or if
+// it can bail out.
+type FilterSpans func(Spanset) ([]Spanset, error)
+
 type FetchSpansRequest struct {
 	Query              string
 	StartTimeUnixNanos uint64
@@ -28,16 +34,24 @@ type FetchSpansRequest struct {
 	// can make extra optimizations by returning only spansets that meet
 	// all criteria.
 	AllConditions bool
+
+	// For some implementations retrieving all of the metadata for the spans
+	// can be quite costly. This hint allows the calling code to filter down
+	// spans before the span metadata is fetched, but after the data requested
+	// in the Conditions is fetched. If this is unset then all metadata
+	// for all matching spansets is returned.
+	Filter FilterSpans
 }
 
 func (f *FetchSpansRequest) appendCondition(c ...Condition) {
 	f.Conditions = append(f.Conditions, c...)
 }
 
-type Span struct {
-	// jpe need rownumber to go fetch metadata later?
-	RowNum     parquetquery.RowNumber
-	Attributes map[Attribute]Static
+type Span struct { // jpe make this an interface?
+	RowNum             parquetquery.RowNumber
+	StartTimeUnixNanos uint64
+	EndtimeUnixNanos   uint64
+	Attributes         map[Attribute]Static
 }
 
 type Spanset struct {
@@ -49,7 +63,7 @@ type SpanMetadata struct {
 	ID                 []byte
 	StartTimeUnixNanos uint64
 	EndtimeUnixNanos   uint64
-	Attributes         map[Attribute]Static // jpe copy from Span in the fetch part?
+	Attributes         map[Attribute]Static
 }
 
 type SpansetMetadata struct {
@@ -62,7 +76,7 @@ type SpansetMetadata struct {
 }
 
 type SpansetIterator interface {
-	Next(context.Context) (*Spanset, error)
+	Next(context.Context) (*SpansetMetadata, error)
 }
 
 type FetchSpansResponse struct {
@@ -72,7 +86,6 @@ type FetchSpansResponse struct {
 
 type SpansetFetcher interface {
 	Fetch(context.Context, FetchSpansRequest) (FetchSpansResponse, error)
-	FetchMetadata(context.Context, []Spanset) ([]SpansetMetadata, error) // jpe review this method signature
 }
 
 // MustExtractFetchSpansRequest parses the given traceql query and returns
@@ -115,8 +128,3 @@ func NewSpansetFetcherWrapper(f func(ctx context.Context, req FetchSpansRequest)
 func (s SpansetFetcherWrapper) Fetch(ctx context.Context, request FetchSpansRequest) (FetchSpansResponse, error) {
 	return s.f(ctx, request)
 }
-
-func (s SpansetFetcherWrapper) FetchMetadata(ctx context.Context, spansets []Spanset) ([]SpansetMetadata, error) {
-	// jpe implement
-	return nil, fmt.Errorf("not implemented")
-}
diff --git a/tempodb/encoding/common/interfaces.go b/tempodb/encoding/common/interfaces.go
index 7f1f8089403..f1b451d9f07 100644
--- a/tempodb/encoding/common/interfaces.go
+++ b/tempodb/encoding/common/interfaces.go
@@ -27,7 +27,6 @@ type Searcher interface {
 	SearchTagValuesV2(ctx context.Context, tag traceql.Attribute, cb TagCallbackV2, opts SearchOptions) error
 
 	Fetch(context.Context, traceql.FetchSpansRequest, SearchOptions) (traceql.FetchSpansResponse, error)
-	FetchMetadata(context.Context, []traceql.Spanset, SearchOptions) ([]traceql.SpansetMetadata, error) // jpe review this method signature - i assume this is necessary?
 }
 
 type CacheControl struct {
diff --git a/tempodb/encoding/v2/backend_block.go b/tempodb/encoding/v2/backend_block.go
index 3b9aa889f8c..3bce0421879 100644
--- a/tempodb/encoding/v2/backend_block.go
+++ b/tempodb/encoding/v2/backend_block.go
@@ -250,7 +250,3 @@ func search(decoder model.ObjectDecoder, maxBytes int, id common.ID, obj []byte,
 func (b *BackendBlock) Fetch(context.Context, traceql.FetchSpansRequest, common.SearchOptions) (traceql.FetchSpansResponse, error) {
 	return traceql.FetchSpansResponse{}, common.ErrUnsupported
 }
-
-func (b *BackendBlock) FetchMetadata(context.Context, []traceql.Spanset, common.SearchOptions) ([]traceql.SpansetMetadata, error) {
-	return nil, common.ErrUnsupported
-}
diff --git a/tempodb/encoding/v2/wal_block.go b/tempodb/encoding/v2/wal_block.go
index a26402b0cef..eb6f0cecabb 100644
--- a/tempodb/encoding/v2/wal_block.go
+++ b/tempodb/encoding/v2/wal_block.go
@@ -285,11 +285,6 @@ func (a *walBlock) Fetch(context.Context, traceql.FetchSpansRequest, common.Sear
 	return traceql.FetchSpansResponse{}, common.ErrUnsupported
 }
 
-// FetchMetadata implements traceql.SpansetFetcher
-func (b *walBlock) FetchMetadata(context.Context, []traceql.Spanset, common.SearchOptions) ([]traceql.SpansetMetadata, error) {
-	return nil, common.ErrUnsupported
-}
-
 func (a *walBlock) fullFilename() string {
 	filename := a.fullFilenameSeparator("+")
 	_, e1 := os.Stat(filename)
diff --git a/tempodb/encoding/vparquet/block_search.go b/tempodb/encoding/vparquet/block_search.go
index 37ee9d2bd13..1fcd89b75b0 100644
--- a/tempodb/encoding/vparquet/block_search.go
+++ b/tempodb/encoding/vparquet/block_search.go
@@ -112,17 +112,7 @@ func (b *backendBlock) Search(ctx context.Context, req *tempopb.SearchRequest, o
 	// Get list of row groups to inspect. Ideally we use predicate pushdown
 	// here to keep only row groups that can potentially satisfy the request
 	// conditions, but don't have it figured out yet.
-	rgs := pf.RowGroups()
-	if opts.TotalPages > 0 {
-		// Read UP TO TotalPages.  The sharding calculations
-		// are just estimates, so it may not line up with the
-		// actual number of pages in this file.
-		if opts.StartPage+opts.TotalPages > len(rgs) {
-			opts.TotalPages = len(rgs) - opts.StartPage
-		}
-		rgs = rgs[opts.StartPage : opts.StartPage+opts.TotalPages]
-	}
-
+	rgs := rowGroupsFromFile(pf, opts)
 	results, err := searchParquetFile(derivedCtx, pf, req, rgs)
 	if err != nil {
 		return nil, err
@@ -475,3 +465,18 @@ func callback(cb common.TagCallbackV2, v parquet.Value) (stop bool) {
 		return false
 	}
 }
+
+func rowGroupsFromFile(pf *parquet.File, opts common.SearchOptions) []parquet.RowGroup {
+	rgs := pf.RowGroups()
+	if opts.TotalPages > 0 {
+		// Read UP TO TotalPages.  The sharding calculations
+		// are just estimates, so it may not line up with the
+		// actual number of pages in this file.
+		if opts.StartPage+opts.TotalPages > len(rgs) {
+			opts.TotalPages = len(rgs) - opts.StartPage
+		}
+		rgs = rgs[opts.StartPage : opts.StartPage+opts.TotalPages]
+	}
+
+	return rgs
+}
diff --git a/tempodb/encoding/vparquet/block_traceql.go b/tempodb/encoding/vparquet/block_traceql.go
index 9621eaef93f..e2c0759f03b 100644
--- a/tempodb/encoding/vparquet/block_traceql.go
+++ b/tempodb/encoding/vparquet/block_traceql.go
@@ -57,6 +57,9 @@ const (
 	columnPathSpanHTTPStatusCode = "rs.ils.Spans.HttpStatusCode"
 	columnPathSpanHTTPMethod     = "rs.ils.Spans.HttpMethod"
 	columnPathSpanHTTPURL        = "rs.ils.Spans.HttpUrl"
+
+	otherEntrySpansetKey = "spanset"
+	otherEntrySpanKey    = "span"
 )
 
 var intrinsicColumnLookups = map[traceql.Intrinsic]struct {
@@ -166,37 +169,85 @@ func operandType(operands traceql.Operands) traceql.StaticType {
 // spansetIterator turns the parquet iterator into the final
 // traceql iterator.  Every row it receives is one spanset.
 type spansetIterator struct {
-	iter parquetquery.Iterator
-}
+	iter   parquetquery.Iterator
+	filter traceql.FilterSpans
 
-var _ traceql.SpansetIterator = (*spansetIterator)(nil)
+	currentSpans []*traceql.Span
+}
 
-func (i *spansetIterator) Next(ctx context.Context) (*traceql.Spanset, error) {
+//var _ pq.Iterator = (*spansetIterator)(nil) jpe do i want to do an pq.Iterator? or just leave the types
 
-	res, err := i.iter.Next()
-	if err != nil {
-		return nil, err
+func newSpansetIterator(iter parquetquery.Iterator, filter traceql.FilterSpans) *spansetIterator {
+	return &spansetIterator{
+		iter:   iter,
+		filter: filter,
 	}
-	if res == nil {
-		return nil, nil
+}
+
+func (i *spansetIterator) Next() (*traceql.Span, error) {
+	// drain current buffer
+	if len(i.currentSpans) > 0 {
+		ret := i.currentSpans[0]
+		i.currentSpans = i.currentSpans[1:]
+		return ret, nil
 	}
 
-	// The spanset is in the OtherEntries
-	spanset := res.OtherEntries[0].Value.(*traceql.Spanset)
+	for {
+		res, err := i.iter.Next()
+		if err != nil {
+			return nil, err
+		}
+		if res == nil {
+			return nil, nil
+		}
 
-	return spanset, nil
+		// The spanset is in the OtherEntries
+		iface := res.OtherValueFromKey(otherEntrySpansetKey)
+		if iface == nil {
+			return nil, fmt.Errorf("engine assumption broken: spanset not found in other entries")
+		}
+		spanset, ok := iface.(*traceql.Spanset)
+		if !ok {
+			return nil, fmt.Errorf("engine assumption broken: spanset is not of type *traceql.Spanset")
+		}
+
+		var filteredSpansets []traceql.Spanset
+		if i.filter != nil {
+			filteredSpansets, err = i.filter(*spanset) // jpe pointer/val juggling
+			if err != nil {
+				return nil, err
+			}
+		} else {
+			filteredSpansets = []traceql.Spanset{*spanset} // jpe pointer/val juggling
+		}
+
+		// flatten spans into i.currentSpans
+		for _, ss := range filteredSpansets {
+			for _, s := range ss.Spans {
+				span := s
+				i.currentSpans = append(i.currentSpans, &span) // jpe pointer/val juggling
+			}
+		}
+
+		// found something!
+		if len(i.currentSpans) > 0 {
+			ret := i.currentSpans[0]
+			i.currentSpans = i.currentSpans[1:]
+			return ret, nil
+		}
+	}
 }
 
 // mergeSpansetIterator iterates through a slice of spansetIterators exhausting them
 // in order
 type mergeSpansetIterator struct {
-	iters []*spansetIterator
+	iters []traceql.SpansetIterator
 	cur   int
 }
 
 var _ traceql.SpansetIterator = (*mergeSpansetIterator)(nil)
 
-func (i *mergeSpansetIterator) Next(ctx context.Context) (*traceql.Spanset, error) {
+func (i *mergeSpansetIterator) Next(ctx context.Context) (*traceql.SpansetMetadata, error) {
 	if i.cur >= len(i.iters) {
 		return nil, nil
 	}
@@ -286,7 +337,7 @@ func (i *mergeSpansetIterator) Next(ctx context.Context) (*traceql.Spanset, erro
 //                                                            |
 //                                                            V
 
-func fetch(ctx context.Context, req traceql.FetchSpansRequest, pf *parquet.File, opts common.SearchOptions) (*spansetIterator, error) {
+func fetch(ctx context.Context, req traceql.FetchSpansRequest, pf *parquet.File, opts common.SearchOptions) (*spansetMetadataIterator, error) {
 
 	// Categorize conditions into span-level or resource-level
 	var (
@@ -325,16 +376,7 @@ func fetch(ctx context.Context, req traceql.FetchSpansRequest, pf *parquet.File,
 		}
 	}
 
-	rgs := pf.RowGroups()
-	if opts.TotalPages > 0 {
-		// Read UP TO TotalPages.  The sharding calculations
-		// are just estimates, so it may not line up with the
-		// actual number of pages in this file.
-		if opts.StartPage+opts.TotalPages > len(rgs) {
-			opts.TotalPages = len(rgs) - opts.StartPage
-		}
-		rgs = rgs[opts.StartPage : opts.StartPage+opts.TotalPages]
-	}
+	rgs := rowGroupsFromFile(pf, opts)
 	makeIter := makeIterFunc(ctx, rgs, pf)
 
 	// Global state
@@ -365,7 +407,7 @@ func fetch(ctx context.Context, req traceql.FetchSpansRequest, pf *parquet.File,
 		allConditions = req.AllConditions && !mingledConditions
 	)
 
-	spanIter, err := createSpanIterator(makeIter, spanConditions, spanRequireAtLeastOneMatch, allConditions)
+	spanIter, spanStartEndRetrieved, err := createSpanIterator(makeIter, spanConditions, spanRequireAtLeastOneMatch, allConditions)
 	if err != nil {
 		return nil, errors.Wrap(err, "creating span iterator")
 	}
@@ -377,19 +419,22 @@ func fetch(ctx context.Context, req traceql.FetchSpansRequest, pf *parquet.File,
 
 	traceIter := createTraceIterator(makeIter, resourceIter, req.StartTimeUnixNanos, req.EndTimeUnixNanos)
 
-	return &spansetIterator{traceIter}, nil
+	spansetIter := newSpansetIterator(traceIter, req.Filter)
+
+	return createSpansetMetaIterator(makeIter, spansetIter, spanStartEndRetrieved)
 }
 
 // createSpanIterator iterates through all span-level columns, groups them into rows representing
 // one span each.  Spans are returned that match any of the given conditions.
-func createSpanIterator(makeIter makeIterFn, conditions []traceql.Condition, requireAtLeastOneMatch, allConditions bool) (parquetquery.Iterator, error) {
+func createSpanIterator(makeIter makeIterFn, conditions []traceql.Condition, requireAtLeastOneMatch, allConditions bool) (parquetquery.Iterator, bool, error) {
 
 	var (
-		columnSelectAs     = map[string]string{}
-		columnPredicates   = map[string][]parquetquery.Predicate{}
-		iters              []parquetquery.Iterator
-		genericConditions  []traceql.Condition
-		durationPredicates []*parquetquery.GenericPredicate[int64]
+		columnSelectAs        = map[string]string{}
+		columnPredicates      = map[string][]parquetquery.Predicate{}
+		iters                 []parquetquery.Iterator
+		genericConditions     []traceql.Condition
+		durationPredicates    []*parquetquery.GenericPredicate[int64]
+		spanStartEndRetreived bool
 	)
 
 	addPredicate := func(columnPath string, p parquetquery.Predicate) {
@@ -404,28 +449,29 @@ func createSpanIterator(makeIter makeIterFn, conditions []traceql.Condition, req
 		case traceql.IntrinsicName:
 			pred, err := createStringPredicate(cond.Op, cond.Operands)
 			if err != nil {
-				return nil, err
+				return nil, false, err
 			}
 			addPredicate(columnPathSpanName, pred)
 			columnSelectAs[columnPathSpanName] = columnPathSpanName
 			continue
 
 		case traceql.IntrinsicDuration:
+			spanStartEndRetreived = true
 			pred, err := createIntPredicate(cond.Op, cond.Operands)
 			if err != nil {
-				return nil, err
+				return nil, false, err
 			}
 			durationPredicates = append(durationPredicates, pred)
 			addPredicate(columnPathSpanStartTime, nil)
 			columnSelectAs[columnPathSpanStartTime] = columnPathSpanStartTime
 			addPredicate(columnPathSpanEndTime, nil)
-			columnSelectAs[columnPathSpanEndTime] = columnPathSpanEndTime // jpe these all end up in an orpred but i don't think they should
+			columnSelectAs[columnPathSpanEndTime] = columnPathSpanEndTime
 			continue
 
 		case traceql.IntrinsicStatus:
 			pred, err := createStatusPredicate(cond.Op, cond.Operands)
 			if err != nil {
-				return nil, err
+				return nil, false, err
 			}
 			addPredicate(columnPathSpanStatusCode, pred)
 			columnSelectAs[columnPathSpanStatusCode] = columnPathSpanStatusCode
@@ -444,7 +490,7 @@ func createSpanIterator(makeIter makeIterFn, conditions []traceql.Condition, req
 			if entry.typ == operandType(cond.Operands) {
 				pred, err := createPredicate(cond.Op, cond.Operands)
 				if err != nil {
-					return nil, errors.Wrap(err, "creating predicate")
+					return nil, false, errors.Wrap(err, "creating predicate")
 				}
 				addPredicate(entry.columnPath, pred)
 				columnSelectAs[entry.columnPath] = cond.Attribute.Name
@@ -459,14 +505,14 @@ func createSpanIterator(makeIter makeIterFn, conditions []traceql.Condition, req
 	attrIter, err := createAttributeIterator(makeIter, genericConditions, DefinitionLevelResourceSpansILSSpanAttrs,
 		columnPathSpanAttrKey, columnPathSpanAttrString, columnPathSpanAttrInt, columnPathSpanAttrDouble, columnPathSpanAttrBool, allConditions)
 	if err != nil {
-		return nil, errors.Wrap(err, "creating span attribute iterator")
+		return nil, false, errors.Wrap(err, "creating span attribute iterator")
 	}
 	if attrIter != nil {
 		iters = append(iters, attrIter)
 	}
 
 	for columnPath, predicates := range columnPredicates {
-		iters = append(iters, makeIter(columnPath, parquetquery.NewOrPredicate(predicates...), columnSelectAs[columnPath])) // jpe why is this OR?
+		iters = append(iters, makeIter(columnPath, parquetquery.NewOrPredicate(predicates...), columnSelectAs[columnPath]))
 	}
 
 	var required []parquetquery.Iterator
@@ -506,21 +552,19 @@ func createSpanIterator(makeIter makeIterFn, conditions []traceql.Condition, req
 		iters = nil
 	}
 
-	// jpe if there are no direct conditions imposed on the span/span attributes level we are purposefully going to request the "Kind" column
+	// if there are no direct conditions imposed on the span/span attributes level we are purposefully going to request the "Kind" column
 	//  b/c it is extremely cheap to retrieve. retrieving matching spans in this case will allow aggregates such as "count" to be computed
 	//  how do we know to pull duration for things like | avg(duration) > 1s? look at avg(span.http.status_code) it pushes a column request down here
-	// does the iterator infinite loop if we don't do this?
 	//  the entire engine is built around spans. we have to return at least one entry for every span to the layers above for things to work
 	if len(required) == 0 {
 		required = []parquetquery.Iterator{makeIter(columnPathSpanKind, nil, columnPathSpanKind)}
-		// jpe - this almost doesn't matter and it might be more straightforward to just return this row and let the engine reject it?
 		// without this queries like { .cluster = "cluster" } fail b/c they rely on the span iterator to return something
 		spanCol.kindAsCount = true // signal to the collector we are only grabbing kind to count spans and we should not store it in attributes
 	}
 
 	// Left join here means the span id/start/end iterators + 1 are required,
 	// and all other conditions are optional. Whatever matches is returned.
-	return parquetquery.NewLeftJoinIterator(DefinitionLevelResourceSpansILSSpan, required, iters, spanCol), nil
+	return parquetquery.NewLeftJoinIterator(DefinitionLevelResourceSpansILSSpan, required, iters, spanCol), spanStartEndRetreived, nil
 }
 
 // createResourceIterator iterates through all resourcespans-level (batch-level) columns, groups them into rows representing
@@ -618,7 +662,6 @@ func createResourceIterator(makeIter makeIterFn, spanIterator parquetquery.Itera
 		required, iters, batchCol), nil
 }
 
-// jpe - modify to impose trace level time range condition
 func createTraceIterator(makeIter makeIterFn, resourceIter parquetquery.Iterator, start, end uint64) parquetquery.Iterator {
 	traceIters := make([]parquetquery.Iterator, 0, 3)
 
@@ -957,7 +1000,6 @@ type spanCollector struct {
 
 var _ parquetquery.GroupPredicate = (*spanCollector)(nil)
 
-// jpe - how do we restrict to 3 spans per trace?
 func (c *spanCollector) String() string {
 	return fmt.Sprintf("spanCollector(%d, %v)", c.minAttributes, c.durationFilters)
 }
@@ -969,7 +1011,7 @@ func (c *spanCollector) KeepGroup(res *parquetquery.IteratorResult) bool {
 		RowNum:     res.RowNumber,
 	}
 
-	for _, e := range res.OtherEntries { // jpe - this is likely not going to be copied through correctly
+	for _, e := range res.OtherEntries {
 		span.Attributes[newSpanAttr(e.Key)] = e.Value.(traceql.Static)
 	}
 
@@ -978,13 +1020,12 @@ func (c *spanCollector) KeepGroup(res *parquetquery.IteratorResult) bool {
 	// Merge all individual columns into the span
 	for _, kv := range res.Entries {
 		switch kv.Key {
-		// jpe  move to spansetmeta collector?
-		// case columnPathSpanID:
-		// 	span.ID = kv.Value.ByteArray()
 		case columnPathSpanStartTime:
 			startTimeUnixNanos = kv.Value.Uint64()
+			span.StartTimeUnixNanos = startTimeUnixNanos
 		case columnPathSpanEndTime:
 			endTimeUnixNanos = kv.Value.Uint64()
+			span.EndtimeUnixNanos = endTimeUnixNanos
 		case columnPathSpanName:
 			span.Attributes[traceql.NewIntrinsic(traceql.IntrinsicName)] = traceql.NewStaticString(kv.Value.String())
 		//case columnPathSpanDuration:
@@ -1027,7 +1068,6 @@ func (c *spanCollector) KeepGroup(res *parquetquery.IteratorResult) bool {
 	}
 
 	// Save computed duration if any filters present and at least one is passed.
-	//    jpe move to spansetmeta collector?
 	if len(c.durationFilters) > 0 {
 		duration := endTimeUnixNanos - startTimeUnixNanos
 		for _, f := range c.durationFilters {
@@ -1052,7 +1092,7 @@ func (c *spanCollector) KeepGroup(res *parquetquery.IteratorResult) bool {
 
 	res.Entries = res.Entries[:0]
 	res.OtherEntries = res.OtherEntries[:0]
-	res.AppendOtherValue("span", span)
+	res.AppendOtherValue(otherEntrySpanKey, span)
 
 	return true
 }
@@ -1152,7 +1192,7 @@ func (c *batchCollector) KeepGroup(res *parquetquery.IteratorResult) bool {
 
 	res.Entries = res.Entries[:0]
 	res.OtherEntries = res.OtherEntries[:0]
-	res.AppendOtherValue("spanset", sp)
+	res.AppendOtherValue(otherEntrySpansetKey, sp)
 
 	return true
 }
@@ -1180,7 +1220,7 @@ func (c *traceCollector) KeepGroup(res *parquetquery.IteratorResult) bool {
 
 	res.Entries = res.Entries[:0]
 	res.OtherEntries = res.OtherEntries[:0]
-	res.AppendOtherValue("spanset", finalSpanset)
+	res.AppendOtherValue(otherEntrySpansetKey, finalSpanset)
 
 	return true
 }
diff --git a/tempodb/encoding/vparquet/block_traceql_meta.go b/tempodb/encoding/vparquet/block_traceql_meta.go
index 4f6c42d7b6c..071c5db9c78 100644
--- a/tempodb/encoding/vparquet/block_traceql_meta.go
+++ b/tempodb/encoding/vparquet/block_traceql_meta.go
@@ -3,56 +3,24 @@ package vparquet
 import (
 	"context"
 	"fmt"
-	"time"
 
 	"github.com/grafana/tempo/pkg/parquetquery"
-	v1 "github.com/grafana/tempo/pkg/tempopb/trace/v1"
+	pq "github.com/grafana/tempo/pkg/parquetquery"
 	"github.com/grafana/tempo/pkg/traceql"
-	"github.com/grafana/tempo/tempodb/encoding/common"
-	"github.com/segmentio/parquet-go"
 )
 
-// FetchMetadata for the given spansets from the block.
-func (b *backendBlock) FetchMetadata(ctx context.Context, ss []traceql.Spanset, opts common.SearchOptions) ([]traceql.SpansetMetadata, error) {
-	pf, _, err := b.openForSearch(ctx, opts)
-	if err != nil {
-		return nil, err
-	}
-
-	return fetchMetadata(ctx, ss, pf, opts)
-}
-
-// jpe does this correclty merge the span attributes into the output metadata?
-func fetchMetadata(ctx context.Context, ss []traceql.Spanset, pf *parquet.File, opts common.SearchOptions) ([]traceql.SpansetMetadata, error) {
-	rgs := pf.RowGroups() // jpe consolidate?
-	if opts.TotalPages > 0 {
-		// Read UP TO TotalPages.  The sharding calculations
-		// are just estimates, so it may not line up with the
-		// actual number of pages in this file.
-		if opts.StartPage+opts.TotalPages > len(rgs) {
-			opts.TotalPages = len(rgs) - opts.StartPage
-		}
-		rgs = rgs[opts.StartPage : opts.StartPage+opts.TotalPages]
-	}
-	makeIter := makeIterFunc(ctx, rgs, pf)
-
-	// collect rownumbers
-	rowNums := []parquetquery.RowNumber{} // jpe prealloc
-	for _, ss := range ss {
-		for _, span := range ss.Spans {
-			rowNums = append(rowNums, span.RowNum)
-		}
-	}
-
+func createSpansetMetaIterator(makeIter makeIterFn, ss *spansetIterator, spanStartEndRetreived bool) (*spansetMetadataIterator, error) {
 	// span level iterator
 	iters := make([]parquetquery.Iterator, 0, 4)
-	iters = append(iters, &rowNumberIterator{rowNumbers: rowNums})
-	iters = append(iters, makeIter(columnPathSpanStartTime, nil, columnPathSpanStartTime))
-	iters = append(iters, makeIter(columnPathSpanEndTime, nil, columnPathSpanEndTime))
+	iters = append(iters, &spansToMetaIterator{ss})
+	if !spanStartEndRetreived {
+		iters = append(iters, makeIter(columnPathSpanStartTime, nil, columnPathSpanStartTime))
+		iters = append(iters, makeIter(columnPathSpanEndTime, nil, columnPathSpanEndTime))
+	}
 	iters = append(iters, makeIter(columnPathSpanID, nil, columnPathSpanID))
-	spanIterator := parquetquery.NewJoinIterator(DefinitionLevelResourceSpans, iters, &spanCollector{})
+	spanIterator := parquetquery.NewJoinIterator(DefinitionLevelResourceSpansILSSpan, iters, &spanMetaCollector{})
 
-	// now wrap in a trace level iterator
+	// trace level iterator
 	traceIters := []parquetquery.Iterator{
 		spanIterator,
 		// Add static columns that are always return
@@ -62,119 +30,94 @@ func fetchMetadata(ctx context.Context, ss []traceql.Spanset, pf *parquet.File,
 		makeIter(columnPathRootSpanName, nil, columnPathRootSpanName),
 		makeIter(columnPathRootServiceName, nil, columnPathRootServiceName),
 	}
-	traceIter := parquetquery.NewJoinIterator(DefinitionLevelTrace, traceIters, &traceCollector{})
-
-	// jpe and return meta? should this function return an iterator as well?
-	meta := []traceql.SpansetMetadata{}
-	for {
-		res, err := traceIter.Next()
-		if res == nil {
-			break
-		}
-		if err != nil {
-			return nil, err
-		}
+	traceIter := parquetquery.NewJoinIterator(DefinitionLevelTrace, traceIters, &traceMetaCollector{})
+
+	return newSpansetMetadataIterator(traceIter), nil
+}
+
+// spansToMetaIterator operates similarly to the rowNumberIterator except it takes a spanIterator
+// and drains it. It is the bridge between the "data" iterators and "metadata" iterators
+type spansToMetaIterator struct {
+	iter *spansetIterator
+}
+
+var _ pq.Iterator = (*spansToMetaIterator)(nil)
+
+func (i *spansToMetaIterator) String() string {
+	return fmt.Sprintf("spansToMetaIterator: \n\t%s", i.iter.iter.String())
+}
+
+func (i *spansToMetaIterator) Next() (*pq.IteratorResult, error) {
+	// now go to our iterator
+	next, err := i.iter.Next()
+	if err != nil {
+		return nil, err
+	}
+	if next == nil {
+		return nil, nil
+	}
+
+	res := &pq.IteratorResult{RowNumber: next.RowNum}
+	res.AppendOtherValue(otherEntrySpanKey, next)
 
-		spansetMetaData := res.OtherEntries[0].Value.(*traceql.SpansetMetadata) // jpe one of these assertions is going to break, use key instead of index
-		meta = append(meta, *spansetMetaData)
+	return res, nil
+}
+
+func (i *spansToMetaIterator) SeekTo(to pq.RowNumber, definitionLevel int) (*pq.IteratorResult, error) {
+	var at *pq.IteratorResult
+
+	for at, _ = i.Next(); i != nil && at != nil && pq.CompareRowNumbers(definitionLevel, at.RowNumber, to) < 0; {
+		at, _ = i.Next()
 	}
 
-	return meta, nil
+	return at, nil
 }
 
-// This turns groups of span values into Span objects
+func (i *spansToMetaIterator) Close() {
+	i.iter.iter.Close()
+}
+
+// spanMetaCollector collects iterator results with the expectation that they were created
+// using the iterators defined above
 type spanMetaCollector struct {
-	minAttributes   int                                     // jpe wut
-	durationFilters []*parquetquery.GenericPredicate[int64] // jpe wut
 }
 
 var _ parquetquery.GroupPredicate = (*spanMetaCollector)(nil)
 
 func (c *spanMetaCollector) String() string {
-	return fmt.Sprintf("spanMetaCollector(%d, %v)", c.minAttributes, c.durationFilters)
+	return fmt.Sprintf("spanMetaCollector()")
 }
 
-// jpe - how do we restrict to 3 spans per trace?
 func (c *spanMetaCollector) KeepGroup(res *parquetquery.IteratorResult) bool {
-
-	span := &traceql.SpanMetadata{
-		Attributes: make(map[traceql.Attribute]traceql.Static),
+	// extract the span from the iterator result and steal it's attributes
+	// this is where we convert a traceql.Span to a traceql.SpanMetadata
+	span, ok := res.OtherValueFromKey(otherEntrySpanKey).(*traceql.Span)
+	if !ok {
+		return false // something very wrong happened. should we panic?
 	}
 
-	for _, e := range res.OtherEntries { // jpe - this is likely not going to be copied through correctly
-		span.Attributes[newSpanAttr(e.Key)] = e.Value.(traceql.Static)
+	spanMetadata := &traceql.SpanMetadata{
+		StartTimeUnixNanos: span.StartTimeUnixNanos,
+		EndtimeUnixNanos:   span.EndtimeUnixNanos,
+		Attributes:         span.Attributes,
 	}
 
-	// Merge all individual columns into the span
+	// span start/end time may come from span attributes or it may come from
+	// the iterator results. if we find it in the iterator results, use that
 	for _, kv := range res.Entries {
 		switch kv.Key {
 		case columnPathSpanID:
-			span.ID = kv.Value.ByteArray()
+			spanMetadata.ID = kv.Value.ByteArray()
 		case columnPathSpanStartTime:
-			span.StartTimeUnixNanos = kv.Value.Uint64()
+			spanMetadata.StartTimeUnixNanos = kv.Value.Uint64()
 		case columnPathSpanEndTime:
-			span.EndtimeUnixNanos = kv.Value.Uint64()
-		case columnPathSpanName:
-			span.Attributes[traceql.NewIntrinsic(traceql.IntrinsicName)] = traceql.NewStaticString(kv.Value.String())
-		//case columnPathSpanDuration:
-		//	span.Attributes[traceql.NewIntrinsic(traceql.IntrinsicDuration)] = traceql.NewStaticDuration(time.Duration(kv.Value.Uint64()))
-		case columnPathSpanStatusCode:
-			// Map OTLP status code back to TraceQL enum.
-			// For other values, use the raw integer.
-			var status traceql.Status
-			switch kv.Value.Uint64() {
-			case uint64(v1.Status_STATUS_CODE_UNSET):
-				status = traceql.StatusUnset
-			case uint64(v1.Status_STATUS_CODE_OK):
-				status = traceql.StatusOk
-			case uint64(v1.Status_STATUS_CODE_ERROR):
-				status = traceql.StatusError
-			default:
-				status = traceql.Status(kv.Value.Uint64())
-			}
-			span.Attributes[traceql.NewIntrinsic(traceql.IntrinsicStatus)] = traceql.NewStaticStatus(status)
-		default:
-			// TODO - This exists for span-level dedicated columns like http.status_code
-			// Are nils possible here?
-			switch kv.Value.Kind() {
-			case parquet.Boolean:
-				span.Attributes[newSpanAttr(kv.Key)] = traceql.NewStaticBool(kv.Value.Boolean())
-			case parquet.Int32, parquet.Int64:
-				span.Attributes[newSpanAttr(kv.Key)] = traceql.NewStaticInt(int(kv.Value.Int64()))
-			case parquet.Float:
-				span.Attributes[newSpanAttr(kv.Key)] = traceql.NewStaticFloat(kv.Value.Double())
-			case parquet.ByteArray:
-				span.Attributes[newSpanAttr(kv.Key)] = traceql.NewStaticString(kv.Value.String())
-			}
-		}
-	}
-
-	// Save computed duration if any filters present and at least one is passed.
-	if len(c.durationFilters) > 0 {
-		duration := span.EndtimeUnixNanos - span.StartTimeUnixNanos
-		for _, f := range c.durationFilters {
-			if f == nil || f.Fn(int64(duration)) {
-				span.Attributes[traceql.NewIntrinsic(traceql.IntrinsicDuration)] = traceql.NewStaticDuration(time.Duration(duration))
-				break
-			}
-		}
-	}
-
-	if c.minAttributes > 0 {
-		count := 0
-		for _, v := range span.Attributes {
-			if v.Type != traceql.TypeNil {
-				count++
-			}
-		}
-		if count < c.minAttributes {
-			return false
+			spanMetadata.EndtimeUnixNanos = kv.Value.Uint64()
 		}
 	}
 
 	res.Entries = res.Entries[:0]
 	res.OtherEntries = res.OtherEntries[:0]
-	res.AppendOtherValue("span", span)
+	res.AppendOtherValue(otherEntrySpanKey, spanMetadata)
 
 	return true
 }
@@ -209,17 +152,53 @@ func (c *traceMetaCollector) KeepGroup(res *parquetquery.IteratorResult) bool {
 		}
 	}
 
+	// we're copying spans directly from the spanMetaIterator into the traceMetaIterator
+	//  this skips the step of the batchIterator present on the normal fetch path
 	for _, e := range res.OtherEntries {
-		if spanset, ok := e.Value.(*traceql.SpansetMetadata); ok {
-			finalSpanset.Spans = append(finalSpanset.Spans, spanset.Spans...)
+		if span, ok := e.Value.(*traceql.SpanMetadata); ok {
+			finalSpanset.Spans = append(finalSpanset.Spans, *span)
 		}
 	}
 
 	res.Entries = res.Entries[:0]
 	res.OtherEntries = res.OtherEntries[:0]
-	res.AppendOtherValue("spanset", finalSpanset)
+	res.AppendOtherValue(otherEntrySpansetKey, finalSpanset)
 
 	return true
 }
 
-// jpe - how to marry in attribute data?
+// spansetMetadataIterator turns the parquet iterator into the final
+// traceql iterator.  Every row it receives is one spanset.
+type spansetMetadataIterator struct {
+	iter parquetquery.Iterator
+}
+
+var _ traceql.SpansetIterator = (*spansetMetadataIterator)(nil)
+
+func newSpansetMetadataIterator(iter parquetquery.Iterator) *spansetMetadataIterator {
+	return &spansetMetadataIterator{
+		iter: iter,
+	}
+}
+
+func (i *spansetMetadataIterator) Next(ctx context.Context) (*traceql.SpansetMetadata, error) {
+	res, err := i.iter.Next()
+	if err != nil {
+		return nil, err
+	}
+	if res == nil {
+		return nil, nil
+	}
+
+	// The spanset is in the OtherEntries
+	iface := res.OtherValueFromKey(otherEntrySpansetKey)
+	if iface == nil {
+		return nil, fmt.Errorf("engine assumption broken: spanset not found in other entries")
+	}
+	ss, ok := iface.(*traceql.SpansetMetadata)
+	if !ok {
+		return nil, fmt.Errorf("engine assumption broken: spanset is not of type *traceql.Spanset")
+	}
+
+	return ss, nil
+}
diff --git a/tempodb/encoding/vparquet/block_traceql_meta_test.go b/tempodb/encoding/vparquet/block_traceql_meta_test.go
new file mode 100644
index 00000000000..35cfe4d0eb6
--- /dev/null
+++ b/tempodb/encoding/vparquet/block_traceql_meta_test.go
@@ -0,0 +1,254 @@
+package vparquet
+
+import (
+	"context"
+	"testing"
+	"time"
+
+	"github.com/grafana/tempo/pkg/traceql"
+	"github.com/grafana/tempo/tempodb/encoding/common"
+	"github.com/stretchr/testify/require"
+)
+
+func TestBackendBlockSearchFetchMetaData(t *testing.T) {
+	wantTr := fullyPopulatedTestTrace(nil)
+	b := makeBackendBlockWithTraces(t, []*Trace{wantTr})
+	ctx := context.Background()
+
+	// Helper functions to make requests
+
+	makeSpansets := func(sets ...*traceql.SpansetMetadata) []*traceql.SpansetMetadata {
+		return sets
+	}
+
+	makeSpanset := func(traceID []byte, rootSpanName, rootServiceName string, startTimeUnixNano, durationNanos uint64, spans ...traceql.SpanMetadata) *traceql.SpansetMetadata {
+		return &traceql.SpansetMetadata{
+			TraceID:            traceID,
+			RootSpanName:       rootSpanName,
+			RootServiceName:    rootServiceName,
+			StartTimeUnixNanos: startTimeUnixNano,
+			DurationNanos:      durationNanos,
+			Spans:              spans,
+		}
+	}
+
+	testCases := []struct {
+		req             traceql.FetchSpansRequest
+		expectedResults []*traceql.SpansetMetadata
+	}{
+		{
+			// Empty request returns 1 spanset with all spans
+			traceql.FetchSpansRequest{},
+			makeSpansets(
+				makeSpanset(
+					wantTr.TraceID,
+					wantTr.RootSpanName,
+					wantTr.RootServiceName,
+					wantTr.StartTimeUnixNano,
+					wantTr.DurationNanos,
+					traceql.SpanMetadata{
+						ID:                 wantTr.ResourceSpans[0].ScopeSpans[0].Spans[0].ID,
+						StartTimeUnixNanos: wantTr.ResourceSpans[0].ScopeSpans[0].Spans[0].StartUnixNanos,
+						EndtimeUnixNanos:   wantTr.ResourceSpans[0].ScopeSpans[0].Spans[0].EndUnixNanos,
+						Attributes:         map[traceql.Attribute]traceql.Static{},
+					},
+					traceql.SpanMetadata{
+						ID:                 wantTr.ResourceSpans[1].ScopeSpans[0].Spans[0].ID,
+						StartTimeUnixNanos: wantTr.ResourceSpans[1].ScopeSpans[0].Spans[0].StartUnixNanos,
+						EndtimeUnixNanos:   wantTr.ResourceSpans[1].ScopeSpans[0].Spans[0].EndUnixNanos,
+						Attributes:         map[traceql.Attribute]traceql.Static{},
+					},
+				),
+			),
+		},
+		{
+			// Span attributes lookup
+			// Only matches 1 condition. Returns span but only attributes that matched
+			makeReq(
+				parse(t, `{span.foo = "bar"}`), // matches resource but not span
+				parse(t, `{span.bar = 123}`),   // matches
+			),
+			makeSpansets(
+				makeSpanset(
+					wantTr.TraceID,
+					wantTr.RootSpanName,
+					wantTr.RootServiceName,
+					wantTr.StartTimeUnixNano,
+					wantTr.DurationNanos,
+					traceql.SpanMetadata{
+						ID:                 wantTr.ResourceSpans[0].ScopeSpans[0].Spans[0].ID,
+						StartTimeUnixNanos: wantTr.ResourceSpans[0].ScopeSpans[0].Spans[0].StartUnixNanos,
+						EndtimeUnixNanos:   wantTr.ResourceSpans[0].ScopeSpans[0].Spans[0].EndUnixNanos,
+						Attributes: map[traceql.Attribute]traceql.Static{
+							// foo not returned because the span didn't match it
+							traceql.NewScopedAttribute(traceql.AttributeScopeSpan, false, "bar"): traceql.NewStaticInt(123),
+						},
+					},
+				),
+			),
+		},
+
+		{
+			// Resource attributes lookup
+			makeReq(
+				parse(t, `{resource.foo = "abc"}`), // matches resource but not span
+			),
+			makeSpansets(
+				makeSpanset(
+					wantTr.TraceID,
+					wantTr.RootSpanName,
+					wantTr.RootServiceName,
+					wantTr.StartTimeUnixNano,
+					wantTr.DurationNanos,
+					traceql.SpanMetadata{
+						ID:                 wantTr.ResourceSpans[0].ScopeSpans[0].Spans[0].ID,
+						StartTimeUnixNanos: wantTr.ResourceSpans[0].ScopeSpans[0].Spans[0].StartUnixNanos,
+						EndtimeUnixNanos:   wantTr.ResourceSpans[0].ScopeSpans[0].Spans[0].EndUnixNanos,
+						Attributes: map[traceql.Attribute]traceql.Static{
+							// Foo matched on resource.
+							// TODO - This seems misleading since the span has foo=<something else>
+							//        but for this query we never even looked at span attribute columns.
+							newResAttr("foo"): traceql.NewStaticString("abc"),
+						},
+					},
+				),
+			),
+		},
+
+		{
+			// Multiple attributes, only 1 matches and is returned
+			makeReq(
+				parse(t, `{.foo = "xyz"}`),                   // doesn't match anything
+				parse(t, `{.`+LabelHTTPStatusCode+` = 500}`), // matches span
+			),
+			makeSpansets(
+				makeSpanset(
+					wantTr.TraceID,
+					wantTr.RootSpanName,
+					wantTr.RootServiceName,
+					wantTr.StartTimeUnixNano,
+					wantTr.DurationNanos,
+					traceql.SpanMetadata{
+						ID:                 wantTr.ResourceSpans[0].ScopeSpans[0].Spans[0].ID,
+						StartTimeUnixNanos: wantTr.ResourceSpans[0].ScopeSpans[0].Spans[0].StartUnixNanos,
+						EndtimeUnixNanos:   wantTr.ResourceSpans[0].ScopeSpans[0].Spans[0].EndUnixNanos,
+						Attributes: map[traceql.Attribute]traceql.Static{
+							newSpanAttr(LabelHTTPStatusCode): traceql.NewStaticInt(500), // This is the only attribute that matched anything
+						},
+					},
+				),
+			),
+		},
+
+		{
+			// Project attributes of all types
+			makeReq(
+				parse(t, `{.foo }`),                    // String
+				parse(t, `{.`+LabelHTTPStatusCode+`}`), // Int
+				parse(t, `{.float }`),                  // Float
+				parse(t, `{.bool }`),                   // bool
+			),
+			makeSpansets(
+				makeSpanset(
+					wantTr.TraceID,
+					wantTr.RootSpanName,
+					wantTr.RootServiceName,
+					wantTr.StartTimeUnixNano,
+					wantTr.DurationNanos,
+					traceql.SpanMetadata{
+						ID:                 wantTr.ResourceSpans[0].ScopeSpans[0].Spans[0].ID,
+						StartTimeUnixNanos: wantTr.ResourceSpans[0].ScopeSpans[0].Spans[0].StartUnixNanos,
+						EndtimeUnixNanos:   wantTr.ResourceSpans[0].ScopeSpans[0].Spans[0].EndUnixNanos,
+						Attributes: map[traceql.Attribute]traceql.Static{
+							newResAttr("foo"):                traceql.NewStaticString("abc"), // Both are returned
+							newSpanAttr("foo"):               traceql.NewStaticString("def"), // Both are returned
+							newSpanAttr(LabelHTTPStatusCode): traceql.NewStaticInt(500),
+							newSpanAttr("float"):             traceql.NewStaticFloat(456.78),
+							newSpanAttr("bool"):              traceql.NewStaticBool(false),
+						},
+					},
+				),
+			),
+		},
+
+		{
+			// doesn't match anything
+			makeReq(parse(t, `{.xyz = "xyz"}`)),
+			nil,
+		},
+
+		{
+			// Intrinsics. 2nd span only
+			makeReq(
+				parse(t, `{ name = "world" }`),
+				parse(t, `{ status = unset }`),
+			),
+			makeSpansets(
+				makeSpanset(
+					wantTr.TraceID,
+					wantTr.RootSpanName,
+					wantTr.RootServiceName,
+					wantTr.StartTimeUnixNano,
+					wantTr.DurationNanos,
+					traceql.SpanMetadata{
+						ID:                 wantTr.ResourceSpans[1].ScopeSpans[0].Spans[0].ID,
+						StartTimeUnixNanos: wantTr.ResourceSpans[1].ScopeSpans[0].Spans[0].StartUnixNanos,
+						EndtimeUnixNanos:   wantTr.ResourceSpans[1].ScopeSpans[0].Spans[0].EndUnixNanos,
+						Attributes: map[traceql.Attribute]traceql.Static{
+							traceql.NewIntrinsic(traceql.IntrinsicName):   traceql.NewStaticString("world"),
+							traceql.NewIntrinsic(traceql.IntrinsicStatus): traceql.NewStaticStatus(traceql.StatusUnset),
+						},
+					},
+				),
+			),
+		},
+		{
+			// Intrinsic duration with no filtering
+			traceql.FetchSpansRequest{Conditions: []traceql.Condition{{Attribute: traceql.NewIntrinsic(traceql.IntrinsicDuration)}}},
+			makeSpansets(
+				makeSpanset(
+					wantTr.TraceID,
+					wantTr.RootSpanName,
+					wantTr.RootServiceName,
+					wantTr.StartTimeUnixNano,
+					wantTr.DurationNanos,
+					traceql.SpanMetadata{
+						ID:                 wantTr.ResourceSpans[0].ScopeSpans[0].Spans[0].ID,
+						StartTimeUnixNanos: wantTr.ResourceSpans[0].ScopeSpans[0].Spans[0].StartUnixNanos,
+						EndtimeUnixNanos:   wantTr.ResourceSpans[0].ScopeSpans[0].Spans[0].EndUnixNanos,
+						Attributes: map[traceql.Attribute]traceql.Static{
+							traceql.NewIntrinsic(traceql.IntrinsicDuration): traceql.NewStaticDuration(100 * time.Second),
+						},
+					},
+					traceql.SpanMetadata{
+						ID:                 wantTr.ResourceSpans[1].ScopeSpans[0].Spans[0].ID,
+						StartTimeUnixNanos: wantTr.ResourceSpans[1].ScopeSpans[0].Spans[0].StartUnixNanos,
+						EndtimeUnixNanos:   wantTr.ResourceSpans[1].ScopeSpans[0].Spans[0].EndUnixNanos,
+						Attributes: map[traceql.Attribute]traceql.Static{
+							traceql.NewIntrinsic(traceql.IntrinsicDuration): traceql.NewStaticDuration(0 * time.Second),
+						},
+					},
+				),
+			),
+		},
+	}
+
+	for _, tc := range testCases {
+		req := tc.req
+		resp, err := b.Fetch(ctx, req, common.DefaultSearchOptions())
+		require.NoError(t, err, "search request:", req)
+
+		// Turn iterator into slice
+		var ss []*traceql.SpansetMetadata
+		for {
+			spanSet, err := resp.Results.Next(ctx)
+			require.NoError(t, err)
+			if spanSet == nil {
+				break
+			}
+			ss = append(ss, spanSet)
+		}
+
+		require.Equal(t, tc.expectedResults, ss, "search request:", req)
+	}
+}
diff --git a/tempodb/encoding/vparquet/block_traceql_test.go b/tempodb/encoding/vparquet/block_traceql_test.go
index 6d92ece9be0..d8ff21dbc71 100644
--- a/tempodb/encoding/vparquet/block_traceql_test.go
+++ b/tempodb/encoding/vparquet/block_traceql_test.go
@@ -24,7 +24,7 @@ func TestOne(t *testing.T) {
 	wantTr := fullyPopulatedTestTrace(nil)
 	b := makeBackendBlockWithTraces(t, []*Trace{wantTr})
 	ctx := context.Background()
-	req := traceql.MustExtractFetchSpansRequest(`{ true }`)
+	req := traceql.MustExtractFetchSpansRequest(`{ span.foo = "bar" || duration > 1s }`)
 
 	req.StartTimeUnixNanos = uint64(1000 * time.Second)
 	req.EndTimeUnixNanos = uint64(1001 * time.Second)
@@ -38,7 +38,7 @@ func TestOne(t *testing.T) {
 	fmt.Println("-----------")
 	fmt.Println(req.Query)
 	fmt.Println("-----------")
-	fmt.Println(resp.Results.(*spansetIterator).iter)
+	fmt.Println(resp.Results.(*spansetMetadataIterator).iter)
 	fmt.Println("-----------")
 	fmt.Println(spanSet)
 }
@@ -278,217 +278,6 @@ func TestBackendBlockSearchTraceQL(t *testing.T) {
 	}
 }
 
-func TestBackendBlockSearchTraceQLResults(t *testing.T) {
-	wantTr := fullyPopulatedTestTrace(nil)
-	b := makeBackendBlockWithTraces(t, []*Trace{wantTr})
-	ctx := context.Background()
-
-	// Helper functions to make requests
-
-	makeSpansets := func(sets ...traceql.Spanset) []traceql.Spanset {
-		return sets
-	}
-
-	makeSpanset := func(traceID []byte, rootSpanName, rootServiceName string, startTimeUnixNano, durationNanos uint64, spans ...traceql.Span) traceql.Spanset {
-		return traceql.Spanset{
-			Spans: spans,
-		}
-	}
-
-	testCases := []struct {
-		req             traceql.FetchSpansRequest
-		expectedResults []traceql.Spanset
-	}{
-		{
-			// Span attributes lookup
-			// Only matches 1 condition. Returns span but only attributes that matched
-			makeReq(
-				parse(t, `{span.foo = "bar"}`), // matches resource but not span
-				parse(t, `{span.bar = 123}`),   // matches
-			),
-			makeSpansets(
-				makeSpanset(
-					wantTr.TraceID,
-					wantTr.RootSpanName,
-					wantTr.RootServiceName,
-					wantTr.StartTimeUnixNano,
-					wantTr.DurationNanos,
-					traceql.Span{
-						Attributes: map[traceql.Attribute]traceql.Static{
-							// foo not returned because the span didn't match it
-							traceql.NewScopedAttribute(traceql.AttributeScopeSpan, false, "bar"): traceql.NewStaticInt(123),
-						},
-					},
-				),
-			),
-		},
-
-		{
-			// Resource attributes lookup
-			makeReq(
-				parse(t, `{resource.foo = "abc"}`), // matches resource but not span
-			),
-			makeSpansets(
-				makeSpanset(
-					wantTr.TraceID,
-					wantTr.RootSpanName,
-					wantTr.RootServiceName,
-					wantTr.StartTimeUnixNano,
-					wantTr.DurationNanos,
-					traceql.Span{
-						Attributes: map[traceql.Attribute]traceql.Static{
-							// Foo matched on resource.
-							// TODO - This seems misleading since the span has foo=<something else>
-							//        but for this query we never even looked at span attribute columns.
-							newResAttr("foo"): traceql.NewStaticString("abc"),
-						},
-					},
-				),
-			),
-		},
-
-		{
-			// Multiple attributes, only 1 matches and is returned
-			makeReq(
-				parse(t, `{.foo = "xyz"}`),                   // doesn't match anything
-				parse(t, `{.`+LabelHTTPStatusCode+` = 500}`), // matches span
-			),
-			makeSpansets(
-				makeSpanset(
-					wantTr.TraceID,
-					wantTr.RootSpanName,
-					wantTr.RootServiceName,
-					wantTr.StartTimeUnixNano,
-					wantTr.DurationNanos,
-					traceql.Span{
-						Attributes: map[traceql.Attribute]traceql.Static{
-							newSpanAttr(LabelHTTPStatusCode): traceql.NewStaticInt(500), // This is the only attribute that matched anything
-						},
-					},
-				),
-			),
-		},
-
-		{
-			// Project attributes of all types
-			makeReq(
-				parse(t, `{.foo }`),                    // String
-				parse(t, `{.`+LabelHTTPStatusCode+`}`), // Int
-				parse(t, `{.float }`),                  // Float
-				parse(t, `{.bool }`),                   // bool
-			),
-			makeSpansets(
-				makeSpanset(
-					wantTr.TraceID,
-					wantTr.RootSpanName,
-					wantTr.RootServiceName,
-					wantTr.StartTimeUnixNano,
-					wantTr.DurationNanos,
-					traceql.Span{
-						Attributes: map[traceql.Attribute]traceql.Static{
-							newResAttr("foo"):                traceql.NewStaticString("abc"), // Both are returned
-							newSpanAttr("foo"):               traceql.NewStaticString("def"), // Both are returned
-							newSpanAttr(LabelHTTPStatusCode): traceql.NewStaticInt(500),
-							newSpanAttr("float"):             traceql.NewStaticFloat(456.78),
-							newSpanAttr("bool"):              traceql.NewStaticBool(false),
-						},
-					},
-				),
-			),
-		},
-
-		{
-			// doesn't match anything
-			makeReq(parse(t, `{.xyz = "xyz"}`)),
-			nil,
-		},
-
-		{
-			// Empty request returns 1 spanset with all spans
-			traceql.FetchSpansRequest{},
-			makeSpansets(
-				makeSpanset(
-					wantTr.TraceID,
-					wantTr.RootSpanName,
-					wantTr.RootServiceName,
-					wantTr.StartTimeUnixNano,
-					wantTr.DurationNanos,
-					traceql.Span{
-						Attributes: map[traceql.Attribute]traceql.Static{},
-					},
-					traceql.Span{
-						Attributes: map[traceql.Attribute]traceql.Static{},
-					},
-				),
-			),
-		},
-
-		{
-			// Intrinsics. 2nd span only
-			makeReq(
-				parse(t, `{ name = "world" }`),
-				parse(t, `{ status = unset }`),
-			),
-			makeSpansets(
-				makeSpanset(
-					wantTr.TraceID,
-					wantTr.RootSpanName,
-					wantTr.RootServiceName,
-					wantTr.StartTimeUnixNano,
-					wantTr.DurationNanos,
-					traceql.Span{
-						Attributes: map[traceql.Attribute]traceql.Static{
-							traceql.NewIntrinsic(traceql.IntrinsicName):   traceql.NewStaticString("world"),
-							traceql.NewIntrinsic(traceql.IntrinsicStatus): traceql.NewStaticStatus(traceql.StatusUnset),
-						},
-					},
-				),
-			),
-		},
-		{
-			// Intrinsic duration with no filtering
-			traceql.FetchSpansRequest{Conditions: []traceql.Condition{{Attribute: traceql.NewIntrinsic(traceql.IntrinsicDuration)}}},
-			makeSpansets(
-				makeSpanset(
-					wantTr.TraceID,
-					wantTr.RootSpanName,
-					wantTr.RootServiceName,
-					wantTr.StartTimeUnixNano,
-					wantTr.DurationNanos,
-					traceql.Span{
-						Attributes: map[traceql.Attribute]traceql.Static{
-							traceql.NewIntrinsic(traceql.IntrinsicDuration): traceql.NewStaticDuration(100 * time.Second),
-						},
-					},
-					traceql.Span{
-						Attributes: map[traceql.Attribute]traceql.Static{
-							traceql.NewIntrinsic(traceql.IntrinsicDuration): traceql.NewStaticDuration(0 * time.Second),
-						},
-					},
-				),
-			),
-		},
-	}
-
-	for _, tc := range testCases {
-		req := tc.req
-		resp, err := b.Fetch(ctx, req, common.DefaultSearchOptions())
-		require.NoError(t, err, "search request:", req)
-
-		// Turn iterator into slice
-		var actualResults []traceql.Spanset
-		for {
-			spanSet, err := resp.Results.Next(ctx)
-			require.NoError(t, err)
-			if spanSet == nil {
-				break
-			}
-			actualResults = append(actualResults, *spanSet)
-		}
-		require.Equal(t, tc.expectedResults, actualResults, "search request:", req)
-	}
-}
-
 func makeReq(conditions ...traceql.Condition) traceql.FetchSpansRequest {
 	return traceql.FetchSpansRequest{
 		Conditions: conditions,
@@ -623,17 +412,17 @@ func BenchmarkBackendBlockTraceQL(b *testing.B) {
 		name string
 		req  traceql.FetchSpansRequest
 	}{
-		{"noMatch", traceql.MustExtractFetchSpansRequest("{ span.foo = `bar` }")},
+		{"noMatch", traceql.MustExtractFetchSpansRequest("{ span.bloom = `bar` }")},
 		{"partialMatch", traceql.MustExtractFetchSpansRequest("{ .foo = `bar` && .component = `gRPC` }")},
 		{"service.name", traceql.MustExtractFetchSpansRequest("{ resource.service.name = `a` }")},
 	}
 
 	ctx := context.TODO()
 	tenantID := "1"
-	blockID := uuid.MustParse("3685ee3d-cbbf-4f36-bf28-93447a19dea6")
+	blockID := uuid.MustParse("13a1cd39-90c8-4c6d-a939-1f504ecffcd0")
 
 	r, _, _, err := local.New(&local.Config{
-		Path: path.Join("/Users/marty/src/tmp/"),
+		Path: path.Join("/tmp/testblock/"),
 	})
 	require.NoError(b, err)
 
diff --git a/tempodb/encoding/vparquet/wal_block.go b/tempodb/encoding/vparquet/wal_block.go
index 20a9e9b676b..fc9733fc12b 100644
--- a/tempodb/encoding/vparquet/wal_block.go
+++ b/tempodb/encoding/vparquet/wal_block.go
@@ -570,7 +570,7 @@ func (b *walBlock) Fetch(ctx context.Context, req traceql.FetchSpansRequest, opt
 	}
 
 	pages := b.readFlushes()
-	iters := make([]*spansetIterator, 0, len(pages))
+	iters := make([]traceql.SpansetIterator, 0, len(pages))
 	for _, page := range pages {
 		file, err := page.file()
 		if err != nil {
@@ -592,26 +592,6 @@ func (b *walBlock) Fetch(ctx context.Context, req traceql.FetchSpansRequest, opt
 	}, nil
 }
 
-func (b *walBlock) FetchMetadata(ctx context.Context, ss []traceql.Spanset, opts common.SearchOptions) ([]traceql.SpansetMetadata, error) {
-	pages := b.readFlushes()
-	allMeta := []traceql.SpansetMetadata{}
-	for _, page := range pages {
-		file, err := page.file()
-		if err != nil {
-			return nil, fmt.Errorf("error opening file %s: %w", page.path, err)
-		}
-
-		meta, err := fetchMetadata(ctx, ss, file, opts) // jpe this is wrong. it's using row numbers to match spans, but that won't work across the wal files
-		if err != nil {
-			return nil, errors.Wrap(err, "creating fetch iter")
-		}
-		allMeta = append(allMeta, meta...)
-	}
-
-	// combine iters?
-	return allMeta, nil
-}
-
 func (b *walBlock) walPath() string {
 	filename := fmt.Sprintf("%s+%s+%s", b.meta.BlockID, b.meta.TenantID, VersionString)
 	return filepath.Join(b.path, filename)
diff --git a/tempodb/wal/wal_test.go b/tempodb/wal/wal_test.go
index 444c1cc8a41..e352a79f7b8 100644
--- a/tempodb/wal/wal_test.go
+++ b/tempodb/wal/wal_test.go
@@ -266,6 +266,7 @@ func testSearch(t *testing.T, e encoding.VersionedEncoding) {
 	})
 }
 
+// jpe review this test, make sure filters and everything is working for vparquet
 func TestFetch(t *testing.T) {
 	for _, e := range encoding.AllEncodings() {
 		t.Run(e.Version(), func(t *testing.T) {
@@ -294,17 +295,17 @@ func testFetch(t *testing.T, e encoding.VersionedEncoding) {
 			// grab the first result
 			ss, err := resp.Results.Next(ctx)
 			require.NoError(t, err)
+			require.NotNil(t, ss)
+
+			// confirm traceid matches
+			expectedID := ids[i]
+			require.Len(t, ss, 1)
+			require.Equal(t, ss.TraceID, expectedID)
 
 			// confirm no more matches
 			ss, err = resp.Results.Next(ctx)
 			require.NoError(t, err)
 			require.Nil(t, ss)
-
-			// confirm traceid matches
-			ssmeta, err := block.FetchMetadata(ctx, []traceql.Spanset{*ss}, common.DefaultSearchOptions())
-			expectedID := ids[i]
-			require.Equal(t, 1, len(ssmeta))
-			require.Equal(t, ssmeta[0].TraceID, expectedID)
 		}
 	})
 }

From 5d13894492d53119a7f12d3744a146c1602baabe Mon Sep 17 00:00:00 2001
From: Joe Elliott <number101010@gmail.com>
Date: Wed, 15 Feb 2023 05:49:16 -0500
Subject: [PATCH 08/27] test finagling

Signed-off-by: Joe Elliott <number101010@gmail.com>
---
 tempodb/encoding/vparquet/block_traceql_test.go | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tempodb/encoding/vparquet/block_traceql_test.go b/tempodb/encoding/vparquet/block_traceql_test.go
index d8ff21dbc71..b4095defa82 100644
--- a/tempodb/encoding/vparquet/block_traceql_test.go
+++ b/tempodb/encoding/vparquet/block_traceql_test.go
@@ -412,17 +412,17 @@ func BenchmarkBackendBlockTraceQL(b *testing.B) {
 		name string
 		req  traceql.FetchSpansRequest
 	}{
-		{"noMatch", traceql.MustExtractFetchSpansRequest("{ span.bloom = `bar` }")},
+		//		{"noMatch", traceql.MustExtractFetchSpansRequest("{ span.bloom = `bar` }")},
 		{"partialMatch", traceql.MustExtractFetchSpansRequest("{ .foo = `bar` && .component = `gRPC` }")},
-		{"service.name", traceql.MustExtractFetchSpansRequest("{ resource.service.name = `a` }")},
+		//		{"service.name", traceql.MustExtractFetchSpansRequest("{ resource.service.name = `a` }")},
 	}
 
 	ctx := context.TODO()
 	tenantID := "1"
-	blockID := uuid.MustParse("13a1cd39-90c8-4c6d-a939-1f504ecffcd0")
+	blockID := uuid.MustParse("149e41d2-cc4d-4f71-b355-3377eabc94c8")
 
 	r, _, _, err := local.New(&local.Config{
-		Path: path.Join("/tmp/testblock/"),
+		Path: path.Join("/home/joe/testblock/"),
 	})
 	require.NoError(b, err)
 

From 998b37608b531f8a8ba0e5ce6bb7bbd92e7a3ac4 Mon Sep 17 00:00:00 2001
From: Joe Elliott <number101010@gmail.com>
Date: Wed, 15 Feb 2023 11:10:37 -0500
Subject: [PATCH 09/27] extend benchmarks

Signed-off-by: Joe Elliott <number101010@gmail.com>
---
 .../encoding/vparquet/block_traceql_test.go   | 21 ++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/tempodb/encoding/vparquet/block_traceql_test.go b/tempodb/encoding/vparquet/block_traceql_test.go
index b4095defa82..20978f0c564 100644
--- a/tempodb/encoding/vparquet/block_traceql_test.go
+++ b/tempodb/encoding/vparquet/block_traceql_test.go
@@ -412,9 +412,24 @@ func BenchmarkBackendBlockTraceQL(b *testing.B) {
 		name string
 		req  traceql.FetchSpansRequest
 	}{
-		//		{"noMatch", traceql.MustExtractFetchSpansRequest("{ span.bloom = `bar` }")},
-		{"partialMatch", traceql.MustExtractFetchSpansRequest("{ .foo = `bar` && .component = `gRPC` }")},
-		//		{"service.name", traceql.MustExtractFetchSpansRequest("{ resource.service.name = `a` }")},
+		// span
+		{"spanAttNameNoMatch", traceql.MustExtractFetchSpansRequest("{ span.foo = `bar` }")},
+		{"spanAttValNoMatch", traceql.MustExtractFetchSpansRequest("{ span.bloom = `bar` }")},
+		{"spanAttValMatch", traceql.MustExtractFetchSpansRequest("{ span.bloom > 0 }")},
+		{"spanAttIntrinsicNoMatch", traceql.MustExtractFetchSpansRequest("{ span.name = `asdfasdf` }")},
+		{"spanAttIntrinsicMatch", traceql.MustExtractFetchSpansRequest("{ span.name = `gcs.ReadRange` }")},
+
+		// resource
+		{"resourceAttNameNoMatch", traceql.MustExtractFetchSpansRequest("{ resource.foo = `bar` }")},
+		{"resourceAttValNoMatch", traceql.MustExtractFetchSpansRequest("{ resource.module.path = `bar` }")},
+		{"resourceAttValMatch", traceql.MustExtractFetchSpansRequest("{ resource.os.type = `linux` }")},
+		{"resourceAttIntrinsicNoMatch", traceql.MustExtractFetchSpansRequest("{ resource.service.name = `a` }")},
+		{"resourceAttIntrinsicMatch", traceql.MustExtractFetchSpansRequest("{ resource.service.name = `tempo-query-frontend` }")},
+
+		// mixed
+		{"mixedNameNoMatch", traceql.MustExtractFetchSpansRequest("{ .foo = `bar` }")},
+		{"mixedValNoMatch", traceql.MustExtractFetchSpansRequest("{ .bloom = `bar` }")},
+		{"mixedValMixedMatch", traceql.MustExtractFetchSpansRequest("{ resource.foo = `bar` && span.name = `gcs.ReadRange` }")},
 	}
 
 	ctx := context.TODO()

From a603824c21f4c3f827457a44dcab25a0c02c8774 Mon Sep 17 00:00:00 2001
From: Joe Elliott <number101010@gmail.com>
Date: Tue, 21 Feb 2023 09:29:55 -0500
Subject: [PATCH 10/27] test cleanup

Signed-off-by: Joe Elliott <number101010@gmail.com>
---
 pkg/parquetquery/iters.go                     |  2 +-
 pkg/traceql/engine.go                         |  4 +-
 pkg/traceql/engine_test.go                    |  6 +--
 pkg/traceql/storage.go                        | 10 +++-
 .../encoding/vparquet/block_findtracebyid.go  |  3 --
 tempodb/encoding/vparquet/block_traceql.go    | 15 +++---
 .../encoding/vparquet/block_traceql_test.go   | 47 +++++++++++++++----
 tempodb/wal/wal_test.go                       |  1 -
 8 files changed, 59 insertions(+), 29 deletions(-)

diff --git a/pkg/parquetquery/iters.go b/pkg/parquetquery/iters.go
index 3a4aa1dfd29..506069734b2 100644
--- a/pkg/parquetquery/iters.go
+++ b/pkg/parquetquery/iters.go
@@ -598,7 +598,7 @@ func (j *JoinIterator) String() string {
 	for _, iter := range j.iters {
 		iters += "\n\t" + util.TabOut(iter)
 	}
-	return fmt.Sprintf("JoinIterator: %d\n\t%s\n%s)", j.definitionLevel, iters, j.pred)
+	return fmt.Sprintf("JoinIterator: %d\t%s\n%s)", j.definitionLevel, iters, j.pred)
 }
 
 func (j *JoinIterator) Next() (*IteratorResult, error) {
diff --git a/pkg/traceql/engine.go b/pkg/traceql/engine.go
index a4a2991a555..2eb314bc9fa 100644
--- a/pkg/traceql/engine.go
+++ b/pkg/traceql/engine.go
@@ -84,7 +84,7 @@ func (e *Engine) Execute(ctx context.Context, searchReq *tempopb.SearchRequest,
 			span.LogKV("msg", "iterator.Next", "err", err)
 			return nil, err
 		}
-		res.Traces = append(res.Traces, e.asTraceSearchMetadata(*spanset)) // jpe pointer <> val conversion
+		res.Traces = append(res.Traces, e.asTraceSearchMetadata(spanset))
 
 		if len(res.Traces) >= int(searchReq.Limit) && searchReq.Limit > 0 {
 			break
@@ -121,7 +121,7 @@ func (e *Engine) createFetchSpansRequest(searchReq *tempopb.SearchRequest, pipel
 	return req
 }
 
-func (e *Engine) asTraceSearchMetadata(spanset SpansetMetadata) *tempopb.TraceSearchMetadata {
+func (e *Engine) asTraceSearchMetadata(spanset *SpansetMetadata) *tempopb.TraceSearchMetadata {
 	metadata := &tempopb.TraceSearchMetadata{
 		TraceID:           util.TraceIDToHexString(spanset.TraceID),
 		RootServiceName:   spanset.RootServiceName,
diff --git a/pkg/traceql/engine_test.go b/pkg/traceql/engine_test.go
index a27a162db72..59981200f6d 100644
--- a/pkg/traceql/engine_test.go
+++ b/pkg/traceql/engine_test.go
@@ -144,7 +144,7 @@ func TestEngine_asTraceSearchMetadata(t *testing.T) {
 	spanID1 := traceID[:8]
 	spanID2 := traceID[8:]
 
-	spanSet := SpansetMetadata{
+	spanSet := &SpansetMetadata{
 		TraceID:            traceID,
 		RootServiceName:    "my-service",
 		RootSpanName:       "HTTP GET",
@@ -289,12 +289,12 @@ func (m *MockSpanSetIterator) Next(ctx context.Context) (*SpansetMetadata, error
 			return nil, nil
 		}
 
-		r.Spans = r.Spans[len(ss):] // jpe this is horrible - really should match spans passed by m.filter and only pass that metadata
+		r.Spans = r.Spans[len(ss):]
 		return r, nil
 	}
 }
 
-func spansetFromMetaData(meta *SpansetMetadata) Spanset { // jpe weird pointer conversion
+func spansetFromMetaData(meta *SpansetMetadata) Spanset {
 	ret := Spanset{}
 	for _, s := range meta.Spans {
 		s := Span{
diff --git a/pkg/traceql/storage.go b/pkg/traceql/storage.go
index a742fd81aeb..802a36b0be2 100644
--- a/pkg/traceql/storage.go
+++ b/pkg/traceql/storage.go
@@ -40,6 +40,8 @@ type FetchSpansRequest struct {
 	// spans before the span metadata is fetched, but after the data requested
 	// in the Conditions is fetched. If this is unset then all metadata
 	// for all matching spansets is returned.
+	// If this is set it must be called by the storage layer even if there is
+	// no opportunity to pull metadata independently of span data.
 	Filter FilterSpans
 }
 
@@ -47,11 +49,15 @@ func (f *FetchSpansRequest) appendCondition(c ...Condition) {
 	f.Conditions = append(f.Conditions, c...)
 }
 
-type Span struct { // jpe make this an interface?
+type Span struct {
+	// these metadata fields are used by the storage layer to fetch spans
+	// todo: make this an interface so that the storage layer can track this info w/o muddying the engine
 	RowNum             parquetquery.RowNumber
 	StartTimeUnixNanos uint64
 	EndtimeUnixNanos   uint64
-	Attributes         map[Attribute]Static
+
+	// these are the actual fields used by the engine to evaluate queries
+	Attributes map[Attribute]Static
 }
 
 type Spanset struct {
diff --git a/tempodb/encoding/vparquet/block_findtracebyid.go b/tempodb/encoding/vparquet/block_findtracebyid.go
index 8e782eb4d29..e30f73767c6 100644
--- a/tempodb/encoding/vparquet/block_findtracebyid.go
+++ b/tempodb/encoding/vparquet/block_findtracebyid.go
@@ -178,9 +178,6 @@ func findTraceByID(ctx context.Context, traceID common.ID, meta *backend.BlockMe
 		return nil, nil
 	}
 
-	// jpe - two pass search might break due to this. confirm by writing a test that searches for spans in
-	//  row groups besides the first one.
-
 	// The row number coming out of the iterator is relative,
 	// so offset it using the num rows in all previous groups
 	rowMatch := int64(0)
diff --git a/tempodb/encoding/vparquet/block_traceql.go b/tempodb/encoding/vparquet/block_traceql.go
index e2c0759f03b..d5fe4d533fb 100644
--- a/tempodb/encoding/vparquet/block_traceql.go
+++ b/tempodb/encoding/vparquet/block_traceql.go
@@ -175,8 +175,6 @@ type spansetIterator struct {
 	currentSpans []*traceql.Span
 }
 
-//var _ pq.Iterator = (*spansetIterator)(nil) jpe do i want to do an pq.Iterator? or just leave the types
-
 func newSpansetIterator(iter parquetquery.Iterator, filter traceql.FilterSpans) *spansetIterator {
 	return &spansetIterator{
 		iter:   iter,
@@ -211,21 +209,24 @@ func (i *spansetIterator) Next() (*traceql.Span, error) {
 			return nil, fmt.Errorf("engine assumption broken: spanset is not of type *traceql.Spanset")
 		}
 
+		// TODO: the engine wants to work with value types, but the fetch layer is using pointers. as a result
+		//  there is some gross "bridge" code here that converts between the two. We should write benchmarks
+		//  and determine if moving to or the other is worth it
 		var filteredSpansets []traceql.Spanset
 		if i.filter != nil {
-			filteredSpansets, err = i.filter(*spanset) // jpe pointer/val juggling
+			filteredSpansets, err = i.filter(*spanset)
 			if err != nil {
 				return nil, err
 			}
 		} else {
-			filteredSpansets = []traceql.Spanset{*spanset} // jpe pointer/val juggling
+			filteredSpansets = []traceql.Spanset{*spanset}
 		}
 
 		// flatten spans into i.currentSpans
 		for _, ss := range filteredSpansets {
 			for _, s := range ss.Spans {
 				span := s
-				i.currentSpans = append(i.currentSpans, &span) // jpe pointer/val juggling
+				i.currentSpans = append(i.currentSpans, &span)
 			}
 		}
 
@@ -265,10 +266,6 @@ func (i *mergeSpansetIterator) Next(ctx context.Context) (*traceql.SpansetMetada
 	return spanset, nil
 }
 
-// jpe - review iterators and how they are built here. there may be subtle implications on the span/attribute levels
-// imposed here that are no longer valid b/c of the two pass change. i.e. we no longer have to always grab span level
-// data.
-
 // fetch is the core logic for executing the given conditions against the parquet columns. The algorithm
 // can be summarized as a hiearchy of iterators where we iterate related columns together and collect the results
 // at each level into attributes, spans, and spansets.  Each condition (.foo=bar) is pushed down to the one or more
diff --git a/tempodb/encoding/vparquet/block_traceql_test.go b/tempodb/encoding/vparquet/block_traceql_test.go
index 20978f0c564..2b20224f71a 100644
--- a/tempodb/encoding/vparquet/block_traceql_test.go
+++ b/tempodb/encoding/vparquet/block_traceql_test.go
@@ -1,8 +1,10 @@
 package vparquet
 
 import (
+	"bytes"
 	"context"
 	"fmt"
+	"math/rand"
 	"path"
 	"testing"
 	"time"
@@ -44,8 +46,22 @@ func TestOne(t *testing.T) {
 }
 
 func TestBackendBlockSearchTraceQL(t *testing.T) {
-	wantTr := fullyPopulatedTestTrace(nil)
-	b := makeBackendBlockWithTraces(t, []*Trace{wantTr})
+	numTraces := 250
+	traces := make([]*Trace, 0, numTraces)
+	wantTraceIdx := rand.Intn(numTraces)
+	wantTraceID := test.ValidTraceID(nil)
+	for i := 0; i < numTraces; i++ {
+		if i == wantTraceIdx {
+			traces = append(traces, fullyPopulatedTestTrace(wantTraceID))
+			continue
+		}
+
+		id := test.ValidTraceID(nil)
+		tr := traceToParquet(id, test.MakeTrace(1, id), nil)
+		traces = append(traces, tr)
+	}
+
+	b := makeBackendBlockWithTraces(t, traces)
 	ctx := context.Background()
 
 	searchesThatMatch := []traceql.FetchSpansRequest{
@@ -183,9 +199,19 @@ func TestBackendBlockSearchTraceQL(t *testing.T) {
 		resp, err := b.Fetch(ctx, req, common.DefaultSearchOptions())
 		require.NoError(t, err, "search request:", req)
 
-		spanSet, err := resp.Results.Next(ctx)
-		require.NoError(t, err, "search request:", req)
-		require.NotNil(t, spanSet, "search request:", req)
+		found := false
+		for {
+			spanSet, err := resp.Results.Next(ctx)
+			require.NoError(t, err, "search request:", req)
+			if spanSet == nil {
+				break
+			}
+			found = bytes.Equal(spanSet.TraceID, wantTraceID)
+			if found {
+				break
+			}
+		}
+		require.True(t, found, "search request:", req)
 	}
 
 	searchesThatDontMatch := []traceql.FetchSpansRequest{
@@ -272,9 +298,14 @@ func TestBackendBlockSearchTraceQL(t *testing.T) {
 		resp, err := b.Fetch(ctx, req, common.DefaultSearchOptions())
 		require.NoError(t, err, "search request:", req)
 
-		spanSet, err := resp.Results.Next(ctx)
-		require.NoError(t, err, "search request:", req)
-		require.Nil(t, spanSet, "search request:", req)
+		for {
+			spanSet, err := resp.Results.Next(ctx)
+			require.NoError(t, err, "search request:", req)
+			if spanSet == nil {
+				break
+			}
+			require.NotEqual(t, wantTraceID, spanSet.TraceID, "search request:", req)
+		}
 	}
 }
 
diff --git a/tempodb/wal/wal_test.go b/tempodb/wal/wal_test.go
index e352a79f7b8..ec8bca4f02a 100644
--- a/tempodb/wal/wal_test.go
+++ b/tempodb/wal/wal_test.go
@@ -266,7 +266,6 @@ func testSearch(t *testing.T, e encoding.VersionedEncoding) {
 	})
 }
 
-// jpe review this test, make sure filters and everything is working for vparquet
 func TestFetch(t *testing.T) {
 	for _, e := range encoding.AllEncodings() {
 		t.Run(e.Version(), func(t *testing.T) {

From 100ce1144f911e4da09bce77a7d27446e211f451 Mon Sep 17 00:00:00 2001
From: Joe Elliott <number101010@gmail.com>
Date: Tue, 21 Feb 2023 12:17:37 -0500
Subject: [PATCH 11/27] removed query field

Signed-off-by: Joe Elliott <number101010@gmail.com>
---
 pkg/traceql/storage.go                          | 2 --
 tempodb/encoding/vparquet/block_traceql_test.go | 2 --
 2 files changed, 4 deletions(-)

diff --git a/pkg/traceql/storage.go b/pkg/traceql/storage.go
index 802a36b0be2..38793ac0aaf 100644
--- a/pkg/traceql/storage.go
+++ b/pkg/traceql/storage.go
@@ -22,7 +22,6 @@ type Condition struct {
 type FilterSpans func(Spanset) ([]Spanset, error)
 
 type FetchSpansRequest struct {
-	Query              string
 	StartTimeUnixNanos uint64
 	EndTimeUnixNanos   uint64
 	Conditions         []Condition
@@ -113,7 +112,6 @@ func ExtractFetchSpansRequest(query string) (FetchSpansRequest, error) {
 	}
 
 	req := FetchSpansRequest{
-		Query:         query,
 		AllConditions: true,
 	}
 
diff --git a/tempodb/encoding/vparquet/block_traceql_test.go b/tempodb/encoding/vparquet/block_traceql_test.go
index 2b20224f71a..1c6004f1056 100644
--- a/tempodb/encoding/vparquet/block_traceql_test.go
+++ b/tempodb/encoding/vparquet/block_traceql_test.go
@@ -37,8 +37,6 @@ func TestOne(t *testing.T) {
 	spanSet, err := resp.Results.Next(ctx)
 	require.NoError(t, err, "search request:", req)
 
-	fmt.Println("-----------")
-	fmt.Println(req.Query)
 	fmt.Println("-----------")
 	fmt.Println(resp.Results.(*spansetMetadataIterator).iter)
 	fmt.Println("-----------")

From 04b4ba0c6fc87b3e163f928f66d11bd191c3705b Mon Sep 17 00:00:00 2001
From: Joe Elliott <number101010@gmail.com>
Date: Tue, 21 Feb 2023 12:24:41 -0500
Subject: [PATCH 12/27] lint

Signed-off-by: Joe Elliott <number101010@gmail.com>
---
 pkg/parquetquery/predicates.go                |  2 +-
 pkg/traceql/engine_test.go                    | 30 +++++++++----------
 tempodb/encoding/vparquet/block_traceql.go    |  2 +-
 .../encoding/vparquet/block_traceql_meta.go   |  2 +-
 4 files changed, 17 insertions(+), 19 deletions(-)

diff --git a/pkg/parquetquery/predicates.go b/pkg/parquetquery/predicates.go
index 8f3aef7c43d..8a3819412c7 100644
--- a/pkg/parquetquery/predicates.go
+++ b/pkg/parquetquery/predicates.go
@@ -267,7 +267,7 @@ func NewGenericPredicate[T any](fn func(T) bool, rangeFn func(T, T) bool, extrac
 }
 
 func (p *GenericPredicate[T]) String() string {
-	return fmt.Sprintf("GenericPredicate{}")
+	return "GenericPredicate{}"
 }
 
 func (p *GenericPredicate[T]) KeepColumnChunk(c pq.ColumnChunk) bool {
diff --git a/pkg/traceql/engine_test.go b/pkg/traceql/engine_test.go
index 59981200f6d..199fcd491b8 100644
--- a/pkg/traceql/engine_test.go
+++ b/pkg/traceql/engine_test.go
@@ -274,24 +274,22 @@ type MockSpanSetIterator struct {
 }
 
 func (m *MockSpanSetIterator) Next(ctx context.Context) (*SpansetMetadata, error) {
-	for {
-		if len(m.results) == 0 {
-			return nil, nil
-		}
-		r := m.results[0]
-		m.results = m.results[1:]
-
-		ss, err := m.filter(spansetFromMetaData(r))
-		if err != nil {
-			return nil, err
-		}
-		if len(ss) == 0 {
-			return nil, nil
-		}
+	if len(m.results) == 0 {
+		return nil, nil
+	}
+	r := m.results[0]
+	m.results = m.results[1:]
 
-		r.Spans = r.Spans[len(ss):]
-		return r, nil
+	ss, err := m.filter(spansetFromMetaData(r))
+	if err != nil {
+		return nil, err
+	}
+	if len(ss) == 0 {
+		return nil, nil
 	}
+
+	r.Spans = r.Spans[len(ss):]
+	return r, nil
 }
 
 func spansetFromMetaData(meta *SpansetMetadata) Spanset {
diff --git a/tempodb/encoding/vparquet/block_traceql.go b/tempodb/encoding/vparquet/block_traceql.go
index d5fe4d533fb..a1b67336e6e 100644
--- a/tempodb/encoding/vparquet/block_traceql.go
+++ b/tempodb/encoding/vparquet/block_traceql.go
@@ -1043,7 +1043,7 @@ func (c *spanCollector) KeepGroup(res *parquetquery.IteratorResult) bool {
 			}
 			span.Attributes[traceql.NewIntrinsic(traceql.IntrinsicStatus)] = traceql.NewStaticStatus(status)
 		case columnPathSpanKind:
-			// if we're acutally retrieving kind then keep it, otherwise it's just being used to count spans and we should not
+			// if we're actually retrieving kind then keep it, otherwise it's just being used to count spans and we should not
 			//  include it in our attributes
 			if !c.kindAsCount {
 				span.Attributes[newSpanAttr(columnPathSpanKind)] = traceql.NewStaticInt(int(kv.Value.Int64()))
diff --git a/tempodb/encoding/vparquet/block_traceql_meta.go b/tempodb/encoding/vparquet/block_traceql_meta.go
index 071c5db9c78..43153147be9 100644
--- a/tempodb/encoding/vparquet/block_traceql_meta.go
+++ b/tempodb/encoding/vparquet/block_traceql_meta.go
@@ -85,7 +85,7 @@ type spanMetaCollector struct {
 var _ parquetquery.GroupPredicate = (*spanMetaCollector)(nil)
 
 func (c *spanMetaCollector) String() string {
-	return fmt.Sprintf("spanMetaCollector()")
+	return "spanMetaCollector()"
 }
 
 func (c *spanMetaCollector) KeepGroup(res *parquetquery.IteratorResult) bool {

From bc9e2475ba24f880e10bf87c07cff711cf748c42 Mon Sep 17 00:00:00 2001
From: Joe Elliott <number101010@gmail.com>
Date: Tue, 21 Feb 2023 13:37:15 -0500
Subject: [PATCH 13/27] fix test

Signed-off-by: Joe Elliott <number101010@gmail.com>
---
 tempodb/wal/wal_test.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tempodb/wal/wal_test.go b/tempodb/wal/wal_test.go
index ec8bca4f02a..c41fbe1b5dd 100644
--- a/tempodb/wal/wal_test.go
+++ b/tempodb/wal/wal_test.go
@@ -298,7 +298,7 @@ func testFetch(t *testing.T, e encoding.VersionedEncoding) {
 
 			// confirm traceid matches
 			expectedID := ids[i]
-			require.Len(t, ss, 1)
+			require.NotNil(t, ss)
 			require.Equal(t, ss.TraceID, expectedID)
 
 			// confirm no more matches

From 632cdce0a978ebaa94bb7c6746fb7da1f3b26779 Mon Sep 17 00:00:00 2001
From: Joe Elliott <number101010@gmail.com>
Date: Wed, 22 Feb 2023 09:22:07 -0500
Subject: [PATCH 14/27] fixed/improved bench

Signed-off-by: Joe Elliott <number101010@gmail.com>
---
 tempodb/encoding/vparquet/block_traceql_test.go | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/tempodb/encoding/vparquet/block_traceql_test.go b/tempodb/encoding/vparquet/block_traceql_test.go
index 1c6004f1056..287be9a15e9 100644
--- a/tempodb/encoding/vparquet/block_traceql_test.go
+++ b/tempodb/encoding/vparquet/block_traceql_test.go
@@ -445,8 +445,8 @@ func BenchmarkBackendBlockTraceQL(b *testing.B) {
 		{"spanAttNameNoMatch", traceql.MustExtractFetchSpansRequest("{ span.foo = `bar` }")},
 		{"spanAttValNoMatch", traceql.MustExtractFetchSpansRequest("{ span.bloom = `bar` }")},
 		{"spanAttValMatch", traceql.MustExtractFetchSpansRequest("{ span.bloom > 0 }")},
-		{"spanAttIntrinsicNoMatch", traceql.MustExtractFetchSpansRequest("{ span.name = `asdfasdf` }")},
-		{"spanAttIntrinsicMatch", traceql.MustExtractFetchSpansRequest("{ span.name = `gcs.ReadRange` }")},
+		{"spanAttIntrinsicNoMatch", traceql.MustExtractFetchSpansRequest("{ name = `asdfasdf` }")},
+		{"spanAttIntrinsicMatch", traceql.MustExtractFetchSpansRequest("{ name = `gcs.ReadRange` }")},
 
 		// resource
 		{"resourceAttNameNoMatch", traceql.MustExtractFetchSpansRequest("{ resource.foo = `bar` }")},
@@ -458,7 +458,9 @@ func BenchmarkBackendBlockTraceQL(b *testing.B) {
 		// mixed
 		{"mixedNameNoMatch", traceql.MustExtractFetchSpansRequest("{ .foo = `bar` }")},
 		{"mixedValNoMatch", traceql.MustExtractFetchSpansRequest("{ .bloom = `bar` }")},
-		{"mixedValMixedMatch", traceql.MustExtractFetchSpansRequest("{ resource.foo = `bar` && span.name = `gcs.ReadRange` }")},
+		{"mixedValMixedMatchAnd", traceql.MustExtractFetchSpansRequest("{ resource.foo = `bar` && name = `gcs.ReadRange` }")},
+		{"mixedValMixedMatchOr", traceql.MustExtractFetchSpansRequest("{ resource.foo = `bar` || name = `gcs.ReadRange` }")},
+		{"mixedValBothMatch", traceql.MustExtractFetchSpansRequest("{ resource.service.name = `query-frontend` && name = `gcs.ReadRange` }")},
 	}
 
 	ctx := context.TODO()

From ca8a8af85daabacdd67aa285de39fa27fd77fc89 Mon Sep 17 00:00:00 2001
From: Joe Elliott <number101010@gmail.com>
Date: Wed, 22 Feb 2023 10:54:41 -0500
Subject: [PATCH 15/27] span -> *span

Signed-off-by: Joe Elliott <number101010@gmail.com>
---
 pkg/traceql/ast.go                         |  4 +--
 pkg/traceql/ast_execute.go                 | 12 ++++----
 pkg/traceql/ast_execute_test.go            | 28 +++++++++----------
 pkg/traceql/ast_test.go                    | 32 +++++++++++-----------
 pkg/traceql/engine_test.go                 |  2 +-
 pkg/traceql/storage.go                     |  2 +-
 tempodb/encoding/vparquet/block_traceql.go | 11 ++++----
 7 files changed, 45 insertions(+), 46 deletions(-)

diff --git a/pkg/traceql/ast.go b/pkg/traceql/ast.go
index 76e86adaabb..d82c3c5aa18 100644
--- a/pkg/traceql/ast.go
+++ b/pkg/traceql/ast.go
@@ -265,7 +265,7 @@ func (f SpansetFilter) evaluate(input []Spanset) ([]Spanset, error) {
 			continue
 		}
 
-		var matchingSpans []Span
+		var matchingSpans []*Span
 		for _, s := range ss.Spans {
 			result, err := f.Expression.execute(s)
 			if err != nil {
@@ -330,7 +330,7 @@ type FieldExpression interface {
 	__fieldExpression()
 
 	extractConditions(request *FetchSpansRequest)
-	execute(span Span) (Static, error)
+	execute(span *Span) (Static, error)
 }
 
 type BinaryOperation struct {
diff --git a/pkg/traceql/ast_execute.go b/pkg/traceql/ast_execute.go
index 51ec2086cd4..b25a003a4b4 100644
--- a/pkg/traceql/ast_execute.go
+++ b/pkg/traceql/ast_execute.go
@@ -10,7 +10,7 @@ import (
 	"github.com/grafana/tempo/pkg/util/log"
 )
 
-func appendSpans(buffer []Span, input []Spanset) []Span {
+func appendSpans(buffer []*Span, input []Spanset) []*Span {
 	for _, i := range input {
 		buffer = append(buffer, i.Spans...)
 	}
@@ -92,7 +92,7 @@ func (f ScalarFilter) evaluate(input []Spanset) (output []Spanset, err error) {
 	return output, nil
 }
 
-func (f SpansetFilter) matches(span Span) (bool, error) {
+func (f SpansetFilter) matches(span *Span) (bool, error) {
 	static, err := f.Expression.execute(span)
 	if err != nil {
 		level.Debug(log.Logger).Log("msg", "SpanSetFilter.matches failed", "err", err)
@@ -139,7 +139,7 @@ func (a Aggregate) evaluate(input []Spanset) (output []Spanset, err error) {
 	return output, nil
 }
 
-func (o BinaryOperation) execute(span Span) (Static, error) {
+func (o BinaryOperation) execute(span *Span) (Static, error) {
 	lhs, err := o.LHS.execute(span)
 	if err != nil {
 		return NewStaticNil(), err
@@ -231,7 +231,7 @@ func binOp(op Operator, lhs, rhs Static) (bool, error) {
 	return false, errors.New("unexpected operator " + op.String())
 }
 
-func (o UnaryOperation) execute(span Span) (Static, error) {
+func (o UnaryOperation) execute(span *Span) (Static, error) {
 	static, err := o.Expression.execute(span)
 	if err != nil {
 		return NewStaticNil(), err
@@ -260,11 +260,11 @@ func (o UnaryOperation) execute(span Span) (Static, error) {
 	return NewStaticNil(), errors.New("UnaryOperation has Op different from Not and Sub")
 }
 
-func (s Static) execute(span Span) (Static, error) {
+func (s Static) execute(span *Span) (Static, error) {
 	return s, nil
 }
 
-func (a Attribute) execute(span Span) (Static, error) {
+func (a Attribute) execute(span *Span) (Static, error) {
 	static, ok := span.Attributes[a]
 	if ok {
 		return static, nil
diff --git a/pkg/traceql/ast_execute_test.go b/pkg/traceql/ast_execute_test.go
index 7fd056ca990..478f49235d7 100644
--- a/pkg/traceql/ast_execute_test.go
+++ b/pkg/traceql/ast_execute_test.go
@@ -132,7 +132,7 @@ func TestSpansetFilter_matches(t *testing.T) {
 
 			spansetFilter := expr.Pipeline.Elements[0].(SpansetFilter)
 
-			matches, err := spansetFilter.matches(tt.span)
+			matches, err := spansetFilter.matches(&tt.span)
 
 			if tt.err {
 				fmt.Println(err)
@@ -155,18 +155,18 @@ func TestSpansetOperationEvaluate(t *testing.T) {
 		{
 			"{ .foo = `a` } && { .foo = `b` }",
 			[]Spanset{
-				{Spans: []Span{
+				{Spans: []*Span{
 					// This spanset will be kept because it satisfies both conditions
 					{Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("a")}},
 					{Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("b")}},
 				}},
-				{Spans: []Span{
+				{Spans: []*Span{
 					// This spanset will be dropped
 					{Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("b")}},
 				}},
 			},
 			[]Spanset{
-				{Spans: []Span{
+				{Spans: []*Span{
 					{Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("a")}},
 					{Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("b")}},
 				}},
@@ -175,22 +175,22 @@ func TestSpansetOperationEvaluate(t *testing.T) {
 		{
 			"{ .foo = `a` } || { .foo = `b` }",
 			[]Spanset{
-				{Spans: []Span{
+				{Spans: []*Span{
 					{Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("a")}},
 					{Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("b")}},
 				}},
-				{Spans: []Span{
+				{Spans: []*Span{
 					// Second span will be dropped
 					{Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("b")}},
 					{Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("c")}},
 				}},
 			},
 			[]Spanset{
-				{Spans: []Span{
+				{Spans: []*Span{
 					{Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("a")}},
 					{Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("b")}},
 				}},
-				{Spans: []Span{
+				{Spans: []*Span{
 					{Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("b")}},
 				}},
 			},
@@ -220,12 +220,12 @@ func TestScalarFilterEvaluate(t *testing.T) {
 		{
 			"{ .foo = `a` } | count() > 1",
 			[]Spanset{
-				{Spans: []Span{
+				{Spans: []*Span{
 					// This has 1 match
 					{Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("a")}},
 					{Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("b")}},
 				}},
-				{Spans: []Span{
+				{Spans: []*Span{
 					// This has 2 matches
 					{Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("a")}},
 					{Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("a")}},
@@ -234,7 +234,7 @@ func TestScalarFilterEvaluate(t *testing.T) {
 			[]Spanset{
 				{
 					Scalar: NewStaticInt(2),
-					Spans: []Span{
+					Spans: []*Span{
 						{Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("a")}},
 						{Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("a")}},
 					},
@@ -244,7 +244,7 @@ func TestScalarFilterEvaluate(t *testing.T) {
 		{
 			"{ .foo = `a` } | avg(duration) >= 10ms",
 			[]Spanset{
-				{Spans: []Span{
+				{Spans: []*Span{
 					// Avg duration = 5ms
 					{Attributes: map[Attribute]Static{
 						NewAttribute("foo"):             NewStaticString("a"),
@@ -255,7 +255,7 @@ func TestScalarFilterEvaluate(t *testing.T) {
 						NewIntrinsic(IntrinsicDuration): NewStaticDuration(8 * time.Millisecond)},
 					},
 				}},
-				{Spans: []Span{
+				{Spans: []*Span{
 					// Avg duration = 10ms
 					{Attributes: map[Attribute]Static{
 						NewAttribute("foo"):             NewStaticString("a"),
@@ -272,7 +272,7 @@ func TestScalarFilterEvaluate(t *testing.T) {
 					// TODO - Type handling of aggregate output could use some improvement.
 					// avg(duration) should probably return a Duration instead of a float.
 					Scalar: NewStaticFloat(10.0 * float64(time.Millisecond)),
-					Spans: []Span{
+					Spans: []*Span{
 						{Attributes: map[Attribute]Static{
 							NewAttribute("foo"):             NewStaticString("a"),
 							NewIntrinsic(IntrinsicDuration): NewStaticDuration(5 * time.Millisecond)},
diff --git a/pkg/traceql/ast_test.go b/pkg/traceql/ast_test.go
index 9075e1ac4a1..fd99ec94234 100644
--- a/pkg/traceql/ast_test.go
+++ b/pkg/traceql/ast_test.go
@@ -109,30 +109,30 @@ func TestPipelineEvaluate(t *testing.T) {
 		{
 			"{ true } | { true } | { true }",
 			[]Spanset{
-				{Spans: []Span{{}}},
+				{Spans: []*Span{{}}},
 			},
 			[]Spanset{
-				{Spans: []Span{{}}},
+				{Spans: []*Span{{}}},
 			},
 		},
 		{
 			"{ true } | { false } | { true }",
 			[]Spanset{
-				{Spans: []Span{{}}},
+				{Spans: []*Span{{}}},
 			},
 			[]Spanset{},
 		},
 		{
 			"{ .foo1 = `a` } | { .foo2 = `b` }",
 			[]Spanset{
-				{Spans: []Span{
+				{Spans: []*Span{
 					// First span should be dropped here
 					{Attributes: map[Attribute]Static{NewAttribute("foo1"): NewStaticString("a")}},
 					{Attributes: map[Attribute]Static{NewAttribute("foo1"): NewStaticString("a"), NewAttribute("foo2"): NewStaticString("b")}},
 				}},
 			},
 			[]Spanset{
-				{Spans: []Span{
+				{Spans: []*Span{
 					{Attributes: map[Attribute]Static{NewAttribute("foo1"): NewStaticString("a"), NewAttribute("foo2"): NewStaticString("b")}}}},
 			},
 		},
@@ -159,55 +159,55 @@ func TestSpansetFilterEvaluate(t *testing.T) {
 			"{ true }",
 			[]Spanset{
 				// Empty spanset is dropped
-				{Spans: []Span{}},
-				{Spans: []Span{{}}},
+				{Spans: []*Span{}},
+				{Spans: []*Span{{}}},
 			},
 			[]Spanset{
-				{Spans: []Span{{}}},
+				{Spans: []*Span{{}}},
 			},
 		},
 		{
 			"{ .foo = `a` }",
 			[]Spanset{
-				{Spans: []Span{
+				{Spans: []*Span{
 					// Second span should be dropped here
 					{Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("a")}},
 					{Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("b")}},
 				}},
-				{Spans: []Span{
+				{Spans: []*Span{
 					// This entire spanset will be dropped
 					{Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("b")}},
 				}},
 			},
 			[]Spanset{
-				{Spans: []Span{
+				{Spans: []*Span{
 					{Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("a")}}}},
 			},
 		},
 		{
 			"{ .foo = 1 || (.foo >= 4 && .foo < 6) }",
 			[]Spanset{
-				{Spans: []Span{
+				{Spans: []*Span{
 					// Second span should be dropped here
 					{Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticInt(1)}},
 					{Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticInt(2)}},
 				}},
-				{Spans: []Span{
+				{Spans: []*Span{
 					// First span should be dropped here
 					{Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticInt(3)}},
 					{Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticInt(4)}},
 					{Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticInt(5)}},
 				}},
-				{Spans: []Span{
+				{Spans: []*Span{
 					// Entire spanset should be dropped
 					{Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticInt(6)}},
 					{Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticInt(7)}},
 				}},
 			},
 			[]Spanset{
-				{Spans: []Span{
+				{Spans: []*Span{
 					{Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticInt(1)}}}},
-				{Spans: []Span{
+				{Spans: []*Span{
 					{Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticInt(4)}},
 					{Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticInt(5)}},
 				}},
diff --git a/pkg/traceql/engine_test.go b/pkg/traceql/engine_test.go
index 199fcd491b8..30549a19098 100644
--- a/pkg/traceql/engine_test.go
+++ b/pkg/traceql/engine_test.go
@@ -295,7 +295,7 @@ func (m *MockSpanSetIterator) Next(ctx context.Context) (*SpansetMetadata, error
 func spansetFromMetaData(meta *SpansetMetadata) Spanset {
 	ret := Spanset{}
 	for _, s := range meta.Spans {
-		s := Span{
+		s := &Span{
 			Attributes: s.Attributes,
 		}
 		ret.Spans = append(ret.Spans, s)
diff --git a/pkg/traceql/storage.go b/pkg/traceql/storage.go
index 38793ac0aaf..ba7f1f18def 100644
--- a/pkg/traceql/storage.go
+++ b/pkg/traceql/storage.go
@@ -61,7 +61,7 @@ type Span struct {
 
 type Spanset struct {
 	Scalar Static
-	Spans  []Span
+	Spans  []*Span
 }
 
 type SpanMetadata struct {
diff --git a/tempodb/encoding/vparquet/block_traceql.go b/tempodb/encoding/vparquet/block_traceql.go
index a1b67336e6e..0cc2babb790 100644
--- a/tempodb/encoding/vparquet/block_traceql.go
+++ b/tempodb/encoding/vparquet/block_traceql.go
@@ -226,7 +226,7 @@ func (i *spansetIterator) Next() (*traceql.Span, error) {
 		for _, ss := range filteredSpansets {
 			for _, s := range ss.Spans {
 				span := s
-				i.currentSpans = append(i.currentSpans, &span)
+				i.currentSpans = append(i.currentSpans, span)
 			}
 		}
 
@@ -1003,6 +1003,7 @@ func (c *spanCollector) String() string {
 
 func (c *spanCollector) KeepGroup(res *parquetquery.IteratorResult) bool {
 
+	// TODO: this allocates a lot of memory, we should look into span pooling
 	span := &traceql.Span{
 		Attributes: make(map[traceql.Attribute]traceql.Static),
 		RowNum:     res.RowNumber,
@@ -1116,11 +1117,11 @@ func (c *batchCollector) KeepGroup(res *parquetquery.IteratorResult) bool {
 	// and filter out spans that didn't match anything.
 
 	resAttrs := make(map[traceql.Attribute]traceql.Static)
-	spans := make([]traceql.Span, 0, len(res.OtherEntries))
+	spans := make([]*traceql.Span, 0, len(res.OtherEntries))
 
 	for _, kv := range res.OtherEntries {
 		if span, ok := kv.Value.(*traceql.Span); ok {
-			spans = append(spans, *span)
+			spans = append(spans, span)
 			continue
 		}
 
@@ -1167,9 +1168,7 @@ func (c *batchCollector) KeepGroup(res *parquetquery.IteratorResult) bool {
 		}
 	}
 
-	sp := &traceql.Spanset{
-		Spans: make([]traceql.Span, 0, len(spans)),
-	}
+	sp := &traceql.Spanset{}
 
 	// Copy over only spans that met minimum criteria
 	if c.requireAtLeastOneMatchOverall {

From 09ac676284f22914169af2b6dc7ce86bff00e923 Mon Sep 17 00:00:00 2001
From: Joe Elliott <number101010@gmail.com>
Date: Wed, 22 Feb 2023 14:08:05 -0500
Subject: [PATCH 16/27] pool spans between span and batch collectors

Signed-off-by: Joe Elliott <number101010@gmail.com>
---
 tempodb/encoding/vparquet/block_traceql.go | 39 +++++++++++++++++++---
 1 file changed, 35 insertions(+), 4 deletions(-)

diff --git a/tempodb/encoding/vparquet/block_traceql.go b/tempodb/encoding/vparquet/block_traceql.go
index 0cc2babb790..667989449ac 100644
--- a/tempodb/encoding/vparquet/block_traceql.go
+++ b/tempodb/encoding/vparquet/block_traceql.go
@@ -5,6 +5,7 @@ import (
 	"fmt"
 	"math"
 	"reflect"
+	"sync"
 	"time"
 
 	"github.com/pkg/errors"
@@ -16,6 +17,34 @@ import (
 	"github.com/grafana/tempo/tempodb/encoding/common"
 )
 
+// todo: this sync pool currently just massively reduces allocations for queries like { .foo = "bar" } where millions of spans
+// are created in the spanCollector and then thrown away in the batchCollector. this could be extended to catch other times
+// we drop spans.
+var spanPool = sync.Pool{
+	New: func() interface{} {
+		return &traceql.Span{
+			Attributes: make(map[traceql.Attribute]traceql.Static),
+		}
+	},
+}
+
+func putSpan(s *traceql.Span) {
+	s.EndtimeUnixNanos = 0
+	s.StartTimeUnixNanos = 0
+	s.RowNum = parquetquery.EmptyRowNumber()
+
+	// clear attributes
+	for k := range s.Attributes {
+		delete(s.Attributes, k)
+	}
+
+	spanPool.Put(s)
+}
+
+func getSpan() *traceql.Span {
+	return spanPool.Get().(*traceql.Span)
+}
+
 // Helper function to create an iterator, that abstracts away
 // context like file and rowgroups.
 type makeIterFn func(columnName string, predicate parquetquery.Predicate, selectAs string) parquetquery.Iterator
@@ -1001,13 +1030,13 @@ func (c *spanCollector) String() string {
 	return fmt.Sprintf("spanCollector(%d, %v)", c.minAttributes, c.durationFilters)
 }
 
+var spanCreated = 0
+
 func (c *spanCollector) KeepGroup(res *parquetquery.IteratorResult) bool {
 
 	// TODO: this allocates a lot of memory, we should look into span pooling
-	span := &traceql.Span{
-		Attributes: make(map[traceql.Attribute]traceql.Static),
-		RowNum:     res.RowNumber,
-	}
+	span := getSpan()
+	span.RowNum = res.RowNumber
 
 	for _, e := range res.OtherEntries {
 		span.Attributes[newSpanAttr(e.Key)] = e.Value.(traceql.Static)
@@ -1175,7 +1204,9 @@ func (c *batchCollector) KeepGroup(res *parquetquery.IteratorResult) bool {
 		for _, span := range spans {
 			if len(span.Attributes) > 0 {
 				sp.Spans = append(sp.Spans, span)
+				continue
 			}
+			putSpan(span)
 		}
 	} else {
 		sp.Spans = spans

From 26cd3651f8b3bd824069d2439485b0878e44a0ee Mon Sep 17 00:00:00 2001
From: Joe Elliott <number101010@gmail.com>
Date: Wed, 22 Feb 2023 14:33:46 -0500
Subject: [PATCH 17/27] use shared span slice and lazily creaetd spanset

Signed-off-by: Joe Elliott <number101010@gmail.com>
---
 tempodb/encoding/vparquet/block_traceql.go | 33 +++++++++++++---------
 1 file changed, 20 insertions(+), 13 deletions(-)

diff --git a/tempodb/encoding/vparquet/block_traceql.go b/tempodb/encoding/vparquet/block_traceql.go
index 667989449ac..db9b4964728 100644
--- a/tempodb/encoding/vparquet/block_traceql.go
+++ b/tempodb/encoding/vparquet/block_traceql.go
@@ -655,8 +655,8 @@ func createResourceIterator(makeIter makeIterFn, spanIterator parquetquery.Itera
 		minCount = len(distinct)
 	}
 	batchCol := &batchCollector{
-		requireAtLeastOneMatchOverall,
-		minCount,
+		requireAtLeastOneMatchOverall: requireAtLeastOneMatchOverall,
+		minAttributes:                 minCount,
 	}
 
 	var required []parquetquery.Iterator
@@ -1129,6 +1129,10 @@ func (c *spanCollector) KeepGroup(res *parquetquery.IteratorResult) bool {
 type batchCollector struct {
 	requireAtLeastOneMatchOverall bool
 	minAttributes                 int
+
+	// shared static spans used in KeepGroup. done for memory savings, but won't
+	// work if the batchCollector is accessed concurrently
+	keepGroupSpans []*traceql.Span
 }
 
 var _ parquetquery.GroupPredicate = (*batchCollector)(nil)
@@ -1144,13 +1148,12 @@ func (c *batchCollector) KeepGroup(res *parquetquery.IteratorResult) bool {
 	// flattens it into 1 spanset per trace.  All we really need
 	// todo is merge the resource-level attributes onto the spans
 	// and filter out spans that didn't match anything.
+	c.keepGroupSpans = c.keepGroupSpans[:0]
 
 	resAttrs := make(map[traceql.Attribute]traceql.Static)
-	spans := make([]*traceql.Span, 0, len(res.OtherEntries))
-
 	for _, kv := range res.OtherEntries {
 		if span, ok := kv.Value.(*traceql.Span); ok {
-			spans = append(spans, span)
+			c.keepGroupSpans = append(c.keepGroupSpans, span)
 			continue
 		}
 
@@ -1159,7 +1162,7 @@ func (c *batchCollector) KeepGroup(res *parquetquery.IteratorResult) bool {
 	}
 
 	// Throw out batches without any spans
-	if len(spans) == 0 {
+	if len(c.keepGroupSpans) == 0 {
 		return false
 	}
 
@@ -1181,7 +1184,7 @@ func (c *batchCollector) KeepGroup(res *parquetquery.IteratorResult) bool {
 
 	// Copy resource-level attributes to the individual spans now
 	for k, v := range resAttrs {
-		for _, span := range spans {
+		for _, span := range c.keepGroupSpans {
 			if _, alreadyExists := span.Attributes[k]; !alreadyExists {
 				span.Attributes[k] = v
 			}
@@ -1189,7 +1192,7 @@ func (c *batchCollector) KeepGroup(res *parquetquery.IteratorResult) bool {
 	}
 
 	// Remove unmatched attributes
-	for _, span := range spans {
+	for _, span := range c.keepGroupSpans {
 		for k, v := range span.Attributes {
 			if v.Type == traceql.TypeNil {
 				delete(span.Attributes, k)
@@ -1197,26 +1200,30 @@ func (c *batchCollector) KeepGroup(res *parquetquery.IteratorResult) bool {
 		}
 	}
 
-	sp := &traceql.Spanset{}
+	var filteredSpans []*traceql.Span
 
 	// Copy over only spans that met minimum criteria
 	if c.requireAtLeastOneMatchOverall {
-		for _, span := range spans {
+		for _, span := range c.keepGroupSpans {
 			if len(span.Attributes) > 0 {
-				sp.Spans = append(sp.Spans, span)
+				filteredSpans = append(filteredSpans, span)
 				continue
 			}
 			putSpan(span)
 		}
 	} else {
-		sp.Spans = spans
+		filteredSpans = c.keepGroupSpans
 	}
 
 	// Throw out batches without any spans
-	if len(sp.Spans) == 0 {
+	if len(filteredSpans) == 0 {
 		return false
 	}
 
+	sp := &traceql.Spanset{
+		Spans: filteredSpans,
+	}
+
 	res.Entries = res.Entries[:0]
 	res.OtherEntries = res.OtherEntries[:0]
 	res.AppendOtherValue(otherEntrySpansetKey, sp)

From 0ab4eeafbf3c6d78b9684bed049352d990f21376 Mon Sep 17 00:00:00 2001
From: Joe Elliott <number101010@gmail.com>
Date: Wed, 22 Feb 2023 16:43:27 -0500
Subject: [PATCH 18/27] fix

Signed-off-by: Joe Elliott <number101010@gmail.com>
---
 tempodb/encoding/vparquet/block_traceql.go | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/tempodb/encoding/vparquet/block_traceql.go b/tempodb/encoding/vparquet/block_traceql.go
index db9b4964728..03fa78457e1 100644
--- a/tempodb/encoding/vparquet/block_traceql.go
+++ b/tempodb/encoding/vparquet/block_traceql.go
@@ -1030,11 +1030,7 @@ func (c *spanCollector) String() string {
 	return fmt.Sprintf("spanCollector(%d, %v)", c.minAttributes, c.durationFilters)
 }
 
-var spanCreated = 0
-
 func (c *spanCollector) KeepGroup(res *parquetquery.IteratorResult) bool {
-
-	// TODO: this allocates a lot of memory, we should look into span pooling
 	span := getSpan()
 	span.RowNum = res.RowNumber
 
@@ -1142,7 +1138,6 @@ func (c *batchCollector) String() string {
 }
 
 func (c *batchCollector) KeepGroup(res *parquetquery.IteratorResult) bool {
-
 	// TODO - This wraps everything up in a spanset per batch.
 	// We probably don't need to do this, since the traceCollector
 	// flattens it into 1 spanset per trace.  All we really need
@@ -1212,7 +1207,7 @@ func (c *batchCollector) KeepGroup(res *parquetquery.IteratorResult) bool {
 			putSpan(span)
 		}
 	} else {
-		filteredSpans = c.keepGroupSpans
+		filteredSpans = append([]*traceql.Span(nil), c.keepGroupSpans...)
 	}
 
 	// Throw out batches without any spans

From 7da5b883e926ad6589c1a3aa14f673923ffc9c8a Mon Sep 17 00:00:00 2001
From: Joe Elliott <number101010@gmail.com>
Date: Thu, 23 Feb 2023 08:31:21 -0500
Subject: [PATCH 19/27] more putSpans

Signed-off-by: Joe Elliott <number101010@gmail.com>
---
 tempodb/encoding/vparquet/block_traceql.go      | 11 ++++++++---
 tempodb/encoding/vparquet/block_traceql_meta.go |  1 +
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/tempodb/encoding/vparquet/block_traceql.go b/tempodb/encoding/vparquet/block_traceql.go
index 03fa78457e1..7f6c569e69f 100644
--- a/tempodb/encoding/vparquet/block_traceql.go
+++ b/tempodb/encoding/vparquet/block_traceql.go
@@ -17,9 +17,13 @@ import (
 	"github.com/grafana/tempo/tempodb/encoding/common"
 )
 
-// todo: this sync pool currently just massively reduces allocations for queries like { .foo = "bar" } where millions of spans
-// are created in the spanCollector and then thrown away in the batchCollector. this could be extended to catch other times
-// we drop spans.
+// todo: this sync pool currently massively reduces allocations by pooling spans for certain queries.
+// it currently catches spans discarded:
+// - in the span collector
+// - in the batch collector
+// - while converting to spanmeta
+// to be fully effective it needs to catch spans thrown away in the query engine. perhaps filter spans
+// can return a slice of dropped and kept spansets?
 var spanPool = sync.Pool{
 	New: func() interface{} {
 		return &traceql.Span{
@@ -1109,6 +1113,7 @@ func (c *spanCollector) KeepGroup(res *parquetquery.IteratorResult) bool {
 			}
 		}
 		if count < c.minAttributes {
+			putSpan(span)
 			return false
 		}
 	}
diff --git a/tempodb/encoding/vparquet/block_traceql_meta.go b/tempodb/encoding/vparquet/block_traceql_meta.go
index 43153147be9..290939b934f 100644
--- a/tempodb/encoding/vparquet/block_traceql_meta.go
+++ b/tempodb/encoding/vparquet/block_traceql_meta.go
@@ -101,6 +101,7 @@ func (c *spanMetaCollector) KeepGroup(res *parquetquery.IteratorResult) bool {
 		EndtimeUnixNanos:   span.EndtimeUnixNanos,
 		Attributes:         span.Attributes,
 	}
+	putSpan(span)
 
 	// span start/end time may come from span attributes or it may come from
 	// the iterator results. if we find it in the iterator results, use that

From c9bac950a3c4187117e7dc54ae2528cbc745adab Mon Sep 17 00:00:00 2001
From: Joe Elliott <number101010@gmail.com>
Date: Thu, 23 Feb 2023 09:32:33 -0500
Subject: [PATCH 20/27] overwrite atts

Signed-off-by: Joe Elliott <number101010@gmail.com>
---
 tempodb/encoding/vparquet/block_traceql_meta.go | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tempodb/encoding/vparquet/block_traceql_meta.go b/tempodb/encoding/vparquet/block_traceql_meta.go
index 290939b934f..9ace9ae33e5 100644
--- a/tempodb/encoding/vparquet/block_traceql_meta.go
+++ b/tempodb/encoding/vparquet/block_traceql_meta.go
@@ -101,6 +101,7 @@ func (c *spanMetaCollector) KeepGroup(res *parquetquery.IteratorResult) bool {
 		EndtimeUnixNanos:   span.EndtimeUnixNanos,
 		Attributes:         span.Attributes,
 	}
+	span.Attributes = make(map[traceql.Attribute]traceql.Static) // we have to overwrite the attributes b/c putSpan will attempt to reuse them
 	putSpan(span)
 
 	// span start/end time may come from span attributes or it may come from

From beb06dfc78e8adf756e51d447268d4763bf1b334 Mon Sep 17 00:00:00 2001
From: Joe Elliott <number101010@gmail.com>
Date: Mon, 27 Feb 2023 12:36:11 -0500
Subject: [PATCH 21/27] Change traceql.Span to be an interface

Signed-off-by: Joe Elliott <number101010@gmail.com>
---
 pkg/traceql/ast.go                            |  22 +--
 pkg/traceql/ast_execute.go                    |  25 +--
 pkg/traceql/ast_execute_test.go               | 144 +++++++++---------
 pkg/traceql/ast_test.go                       | 120 +++++++++------
 pkg/traceql/engine.go                         |  18 ++-
 pkg/traceql/engine_test.go                    |  69 ++++-----
 pkg/traceql/storage.go                        |  34 ++---
 tempodb/encoding/vparquet/block_traceql.go    | 104 ++++++++-----
 .../encoding/vparquet/block_traceql_meta.go   |  30 ++--
 .../vparquet/block_traceql_meta_test.go       | 109 +++++++------
 10 files changed, 352 insertions(+), 323 deletions(-)

diff --git a/pkg/traceql/ast.go b/pkg/traceql/ast.go
index d82c3c5aa18..731ed2339ab 100644
--- a/pkg/traceql/ast.go
+++ b/pkg/traceql/ast.go
@@ -14,7 +14,7 @@ type Element interface {
 type pipelineElement interface {
 	Element
 	extractConditions(request *FetchSpansRequest)
-	evaluate([]Spanset) ([]Spanset, error)
+	evaluate([]*Spanset) ([]*Spanset, error)
 }
 
 type typedExpression interface {
@@ -85,7 +85,7 @@ func (p Pipeline) extractConditions(req *FetchSpansRequest) {
 	}
 }
 
-func (p Pipeline) evaluate(input []Spanset) (result []Spanset, err error) {
+func (p Pipeline) evaluate(input []*Spanset) (result []*Spanset, err error) {
 	result = input
 
 	for _, element := range p.Elements {
@@ -95,7 +95,7 @@ func (p Pipeline) evaluate(input []Spanset) (result []Spanset, err error) {
 		}
 
 		if len(result) == 0 {
-			return []Spanset{}, nil
+			return []*Spanset{}, nil
 		}
 	}
 
@@ -116,7 +116,7 @@ func (o GroupOperation) extractConditions(request *FetchSpansRequest) {
 	o.Expression.extractConditions(request)
 }
 
-func (GroupOperation) evaluate(ss []Spanset) ([]Spanset, error) {
+func (GroupOperation) evaluate(ss []*Spanset) ([]*Spanset, error) {
 	return ss, nil
 }
 
@@ -130,7 +130,7 @@ func newCoalesceOperation() CoalesceOperation {
 func (o CoalesceOperation) extractConditions(request *FetchSpansRequest) {
 }
 
-func (CoalesceOperation) evaluate(ss []Spanset) ([]Spanset, error) {
+func (CoalesceOperation) evaluate(ss []*Spanset) ([]*Spanset, error) {
 	return ss, nil
 }
 
@@ -257,15 +257,15 @@ func newSpansetFilter(e FieldExpression) SpansetFilter {
 // nolint: revive
 func (SpansetFilter) __spansetExpression() {}
 
-func (f SpansetFilter) evaluate(input []Spanset) ([]Spanset, error) {
-	var output []Spanset
+func (f SpansetFilter) evaluate(input []*Spanset) ([]*Spanset, error) {
+	var output []*Spanset
 
 	for _, ss := range input {
 		if len(ss.Spans) == 0 {
 			continue
 		}
 
-		var matchingSpans []*Span
+		var matchingSpans []Span
 		for _, s := range ss.Spans {
 			result, err := f.Expression.execute(s)
 			if err != nil {
@@ -287,9 +287,9 @@ func (f SpansetFilter) evaluate(input []Spanset) ([]Spanset, error) {
 			continue
 		}
 
-		matchingSpanset := ss
+		matchingSpanset := *ss
 		matchingSpanset.Spans = matchingSpans
-		output = append(output, matchingSpanset)
+		output = append(output, &matchingSpanset)
 	}
 
 	return output, nil
@@ -330,7 +330,7 @@ type FieldExpression interface {
 	__fieldExpression()
 
 	extractConditions(request *FetchSpansRequest)
-	execute(span *Span) (Static, error)
+	execute(span Span) (Static, error)
 }
 
 type BinaryOperation struct {
diff --git a/pkg/traceql/ast_execute.go b/pkg/traceql/ast_execute.go
index b25a003a4b4..617c4990ebd 100644
--- a/pkg/traceql/ast_execute.go
+++ b/pkg/traceql/ast_execute.go
@@ -10,14 +10,14 @@ import (
 	"github.com/grafana/tempo/pkg/util/log"
 )
 
-func appendSpans(buffer []*Span, input []Spanset) []*Span {
+func appendSpans(buffer []Span, input []*Spanset) []Span {
 	for _, i := range input {
 		buffer = append(buffer, i.Spans...)
 	}
 	return buffer
 }
 
-func (o SpansetOperation) evaluate(input []Spanset) (output []Spanset, err error) {
+func (o SpansetOperation) evaluate(input []*Spanset) (output []*Spanset, err error) {
 
 	for i := range input {
 		curr := input[i : i+1]
@@ -57,7 +57,7 @@ func (o SpansetOperation) evaluate(input []Spanset) (output []Spanset, err error
 	return output, nil
 }
 
-func (f ScalarFilter) evaluate(input []Spanset) (output []Spanset, err error) {
+func (f ScalarFilter) evaluate(input []*Spanset) (output []*Spanset, err error) {
 
 	// TODO we solve this gap where pipeline elements and scalar binary
 	// operations meet in a generic way. For now we only support well-defined
@@ -92,7 +92,7 @@ func (f ScalarFilter) evaluate(input []Spanset) (output []Spanset, err error) {
 	return output, nil
 }
 
-func (f SpansetFilter) matches(span *Span) (bool, error) {
+func (f SpansetFilter) matches(span Span) (bool, error) {
 	static, err := f.Expression.execute(span)
 	if err != nil {
 		level.Debug(log.Logger).Log("msg", "SpanSetFilter.matches failed", "err", err)
@@ -105,7 +105,7 @@ func (f SpansetFilter) matches(span *Span) (bool, error) {
 	return static.B, nil
 }
 
-func (a Aggregate) evaluate(input []Spanset) (output []Spanset, err error) {
+func (a Aggregate) evaluate(input []*Spanset) (output []*Spanset, err error) {
 
 	for _, ss := range input {
 		switch a.op {
@@ -139,7 +139,7 @@ func (a Aggregate) evaluate(input []Spanset) (output []Spanset, err error) {
 	return output, nil
 }
 
-func (o BinaryOperation) execute(span *Span) (Static, error) {
+func (o BinaryOperation) execute(span Span) (Static, error) {
 	lhs, err := o.LHS.execute(span)
 	if err != nil {
 		return NewStaticNil(), err
@@ -231,7 +231,7 @@ func binOp(op Operator, lhs, rhs Static) (bool, error) {
 	return false, errors.New("unexpected operator " + op.String())
 }
 
-func (o UnaryOperation) execute(span *Span) (Static, error) {
+func (o UnaryOperation) execute(span Span) (Static, error) {
 	static, err := o.Expression.execute(span)
 	if err != nil {
 		return NewStaticNil(), err
@@ -260,23 +260,24 @@ func (o UnaryOperation) execute(span *Span) (Static, error) {
 	return NewStaticNil(), errors.New("UnaryOperation has Op different from Not and Sub")
 }
 
-func (s Static) execute(span *Span) (Static, error) {
+func (s Static) execute(span Span) (Static, error) {
 	return s, nil
 }
 
-func (a Attribute) execute(span *Span) (Static, error) {
-	static, ok := span.Attributes[a]
+func (a Attribute) execute(span Span) (Static, error) {
+	atts := span.Attributes()
+	static, ok := atts[a]
 	if ok {
 		return static, nil
 	}
 
 	if a.Scope == AttributeScopeNone {
-		for attribute, static := range span.Attributes {
+		for attribute, static := range atts {
 			if a.Name == attribute.Name && attribute.Scope == AttributeScopeSpan {
 				return static, nil
 			}
 		}
-		for attribute, static := range span.Attributes {
+		for attribute, static := range atts {
 			if a.Name == attribute.Name {
 				return static, nil
 			}
diff --git a/pkg/traceql/ast_execute_test.go b/pkg/traceql/ast_execute_test.go
index 478f49235d7..088da16718d 100644
--- a/pkg/traceql/ast_execute_test.go
+++ b/pkg/traceql/ast_execute_test.go
@@ -19,15 +19,15 @@ func TestSpansetFilter_matches(t *testing.T) {
 	}{
 		{
 			query: `{ ("foo" != "bar") && !("foo" = "bar") }`,
-			span: Span{
-				Attributes: nil,
+			span: &mockSpan{
+				attributes: nil,
 			},
 			matches: true,
 		},
 		{
 			query: `{ .foo = .bar }`,
-			span: Span{
-				Attributes: map[Attribute]Static{
+			span: &mockSpan{
+				attributes: map[Attribute]Static{
 					NewAttribute("foo"): NewStaticString("bzz"),
 					NewAttribute("bar"): NewStaticString("bzz"),
 				},
@@ -37,8 +37,8 @@ func TestSpansetFilter_matches(t *testing.T) {
 		{
 			// Missing attribute
 			query: `{ .foo = "bar" }`,
-			span: Span{
-				Attributes: map[Attribute]Static{
+			span: &mockSpan{
+				attributes: map[Attribute]Static{
 					NewAttribute("fzz"): NewStaticString("bar"),
 				},
 			},
@@ -46,8 +46,8 @@ func TestSpansetFilter_matches(t *testing.T) {
 		},
 		{
 			query: `{ .foo = .bar }`,
-			span: Span{
-				Attributes: map[Attribute]Static{
+			span: &mockSpan{
+				attributes: map[Attribute]Static{
 					NewAttribute("foo"): NewStaticString("str"),
 					NewAttribute("bar"): NewStaticInt(5),
 				},
@@ -57,8 +57,8 @@ func TestSpansetFilter_matches(t *testing.T) {
 		{
 			// Types don't match with operator
 			query: `{ .foo =~ .bar }`,
-			span: Span{
-				Attributes: map[Attribute]Static{
+			span: &mockSpan{
+				attributes: map[Attribute]Static{
 					NewAttribute("foo"): NewStaticInt(3),
 					NewAttribute("bar"): NewStaticInt(5),
 				},
@@ -67,8 +67,8 @@ func TestSpansetFilter_matches(t *testing.T) {
 		},
 		{
 			query: `{ .field1 =~ "hello w.*" && .field2 !~ "bye b.*" }`,
-			span: Span{
-				Attributes: map[Attribute]Static{
+			span: &mockSpan{
+				attributes: map[Attribute]Static{
 					NewAttribute("field1"): NewStaticString("hello world"),
 					NewAttribute("field2"): NewStaticString("bye world"),
 				},
@@ -77,8 +77,8 @@ func TestSpansetFilter_matches(t *testing.T) {
 		},
 		{
 			query: `{ .foo > 2 && .foo >= 3.5 && .foo < 5 && .foo <= 3.5 && .duration > 1800ms }`,
-			span: Span{
-				Attributes: map[Attribute]Static{
+			span: &mockSpan{
+				attributes: map[Attribute]Static{
 					NewAttribute("foo"):      NewStaticFloat(3.5),
 					NewAttribute("duration"): NewStaticDuration(2 * time.Second),
 				},
@@ -87,8 +87,8 @@ func TestSpansetFilter_matches(t *testing.T) {
 		},
 		{
 			query: `{ .foo = "scope_span" }`,
-			span: Span{
-				Attributes: map[Attribute]Static{
+			span: &mockSpan{
+				attributes: map[Attribute]Static{
 					NewScopedAttribute(AttributeScopeSpan, false, "foo"):     NewStaticString("scope_span"),
 					NewScopedAttribute(AttributeScopeResource, false, "foo"): NewStaticString("scope_resource"),
 				},
@@ -97,8 +97,8 @@ func TestSpansetFilter_matches(t *testing.T) {
 		},
 		{
 			query: `{ .foo = "scope_resource" }`,
-			span: Span{
-				Attributes: map[Attribute]Static{
+			span: &mockSpan{
+				attributes: map[Attribute]Static{
 					NewScopedAttribute(AttributeScopeResource, false, "foo"): NewStaticString("scope_resource"),
 				},
 			},
@@ -106,8 +106,8 @@ func TestSpansetFilter_matches(t *testing.T) {
 		},
 		{
 			query: `{ span.foo = "scope_span" }`,
-			span: Span{
-				Attributes: map[Attribute]Static{
+			span: &mockSpan{
+				attributes: map[Attribute]Static{
 					NewScopedAttribute(AttributeScopeSpan, false, "foo"):     NewStaticString("scope_span"),
 					NewScopedAttribute(AttributeScopeResource, false, "foo"): NewStaticString("scope_resource"),
 				},
@@ -116,8 +116,8 @@ func TestSpansetFilter_matches(t *testing.T) {
 		},
 		{
 			query: `{ resource.foo = "scope_resource" }`,
-			span: Span{
-				Attributes: map[Attribute]Static{
+			span: &mockSpan{
+				attributes: map[Attribute]Static{
 					NewScopedAttribute(AttributeScopeSpan, false, "foo"):     NewStaticString("scope_span"),
 					NewScopedAttribute(AttributeScopeResource, false, "foo"): NewStaticString("scope_resource"),
 				},
@@ -132,7 +132,7 @@ func TestSpansetFilter_matches(t *testing.T) {
 
 			spansetFilter := expr.Pipeline.Elements[0].(SpansetFilter)
 
-			matches, err := spansetFilter.matches(&tt.span)
+			matches, err := spansetFilter.matches(tt.span)
 
 			if tt.err {
 				fmt.Println(err)
@@ -149,49 +149,49 @@ func TestSpansetFilter_matches(t *testing.T) {
 func TestSpansetOperationEvaluate(t *testing.T) {
 	testCases := []struct {
 		query  string
-		input  []Spanset
-		output []Spanset
+		input  []*Spanset
+		output []*Spanset
 	}{
 		{
 			"{ .foo = `a` } && { .foo = `b` }",
-			[]Spanset{
-				{Spans: []*Span{
+			[]*Spanset{
+				{Spans: []Span{
 					// This spanset will be kept because it satisfies both conditions
-					{Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("a")}},
-					{Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("b")}},
+					&mockSpan{attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("a")}},
+					&mockSpan{attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("b")}},
 				}},
-				{Spans: []*Span{
+				{Spans: []Span{
 					// This spanset will be dropped
-					{Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("b")}},
+					&mockSpan{attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("b")}},
 				}},
 			},
-			[]Spanset{
-				{Spans: []*Span{
-					{Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("a")}},
-					{Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("b")}},
+			[]*Spanset{
+				{Spans: []Span{
+					&mockSpan{attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("a")}},
+					&mockSpan{attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("b")}},
 				}},
 			},
 		},
 		{
 			"{ .foo = `a` } || { .foo = `b` }",
-			[]Spanset{
-				{Spans: []*Span{
-					{Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("a")}},
-					{Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("b")}},
+			[]*Spanset{
+				{Spans: []Span{
+					&mockSpan{attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("a")}},
+					&mockSpan{attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("b")}},
 				}},
-				{Spans: []*Span{
+				{Spans: []Span{
 					// Second span will be dropped
-					{Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("b")}},
-					{Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("c")}},
+					&mockSpan{attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("b")}},
+					&mockSpan{attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("c")}},
 				}},
 			},
-			[]Spanset{
-				{Spans: []*Span{
-					{Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("a")}},
-					{Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("b")}},
+			[]*Spanset{
+				{Spans: []Span{
+					&mockSpan{attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("a")}},
+					&mockSpan{attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("b")}},
 				}},
-				{Spans: []*Span{
-					{Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("b")}},
+				{Spans: []Span{
+					&mockSpan{attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("b")}},
 				}},
 			},
 		},
@@ -214,70 +214,70 @@ func TestSpansetOperationEvaluate(t *testing.T) {
 func TestScalarFilterEvaluate(t *testing.T) {
 	testCases := []struct {
 		query  string
-		input  []Spanset
-		output []Spanset
+		input  []*Spanset
+		output []*Spanset
 	}{
 		{
 			"{ .foo = `a` } | count() > 1",
-			[]Spanset{
-				{Spans: []*Span{
+			[]*Spanset{
+				{Spans: []Span{
 					// This has 1 match
-					{Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("a")}},
-					{Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("b")}},
+					&mockSpan{attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("a")}},
+					&mockSpan{attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("b")}},
 				}},
-				{Spans: []*Span{
+				{Spans: []Span{
 					// This has 2 matches
-					{Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("a")}},
-					{Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("a")}},
+					&mockSpan{attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("a")}},
+					&mockSpan{attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("a")}},
 				}},
 			},
-			[]Spanset{
+			[]*Spanset{
 				{
 					Scalar: NewStaticInt(2),
-					Spans: []*Span{
-						{Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("a")}},
-						{Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("a")}},
+					Spans: []Span{
+						&mockSpan{attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("a")}},
+						&mockSpan{attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("a")}},
 					},
 				},
 			},
 		},
 		{
 			"{ .foo = `a` } | avg(duration) >= 10ms",
-			[]Spanset{
-				{Spans: []*Span{
+			[]*Spanset{
+				{Spans: []Span{
 					// Avg duration = 5ms
-					{Attributes: map[Attribute]Static{
+					&mockSpan{attributes: map[Attribute]Static{
 						NewAttribute("foo"):             NewStaticString("a"),
 						NewIntrinsic(IntrinsicDuration): NewStaticDuration(2 * time.Millisecond)},
 					},
-					{Attributes: map[Attribute]Static{
+					&mockSpan{attributes: map[Attribute]Static{
 						NewAttribute("foo"):             NewStaticString("a"),
 						NewIntrinsic(IntrinsicDuration): NewStaticDuration(8 * time.Millisecond)},
 					},
 				}},
-				{Spans: []*Span{
+				{Spans: []Span{
 					// Avg duration = 10ms
-					{Attributes: map[Attribute]Static{
+					&mockSpan{attributes: map[Attribute]Static{
 						NewAttribute("foo"):             NewStaticString("a"),
 						NewIntrinsic(IntrinsicDuration): NewStaticDuration(5 * time.Millisecond)},
 					},
-					{Attributes: map[Attribute]Static{
+					&mockSpan{attributes: map[Attribute]Static{
 						NewAttribute("foo"):             NewStaticString("a"),
 						NewIntrinsic(IntrinsicDuration): NewStaticDuration(15 * time.Millisecond)},
 					},
 				}},
 			},
-			[]Spanset{
+			[]*Spanset{
 				{
 					// TODO - Type handling of aggregate output could use some improvement.
 					// avg(duration) should probably return a Duration instead of a float.
 					Scalar: NewStaticFloat(10.0 * float64(time.Millisecond)),
-					Spans: []*Span{
-						{Attributes: map[Attribute]Static{
+					Spans: []Span{
+						&mockSpan{attributes: map[Attribute]Static{
 							NewAttribute("foo"):             NewStaticString("a"),
 							NewIntrinsic(IntrinsicDuration): NewStaticDuration(5 * time.Millisecond)},
 						},
-						{Attributes: map[Attribute]Static{
+						&mockSpan{attributes: map[Attribute]Static{
 							NewAttribute("foo"):             NewStaticString("a"),
 							NewIntrinsic(IntrinsicDuration): NewStaticDuration(15 * time.Millisecond)},
 						},
diff --git a/pkg/traceql/ast_test.go b/pkg/traceql/ast_test.go
index fd99ec94234..9119aeda237 100644
--- a/pkg/traceql/ast_test.go
+++ b/pkg/traceql/ast_test.go
@@ -103,37 +103,37 @@ func TestPipelineExtractConditions(t *testing.T) {
 func TestPipelineEvaluate(t *testing.T) {
 	testCases := []struct {
 		query  string
-		input  []Spanset
-		output []Spanset
+		input  []*Spanset
+		output []*Spanset
 	}{
 		{
 			"{ true } | { true } | { true }",
-			[]Spanset{
-				{Spans: []*Span{{}}},
+			[]*Spanset{
+				{Spans: []Span{&mockSpan{}}},
 			},
-			[]Spanset{
-				{Spans: []*Span{{}}},
+			[]*Spanset{
+				{Spans: []Span{&mockSpan{}}},
 			},
 		},
 		{
 			"{ true } | { false } | { true }",
-			[]Spanset{
-				{Spans: []*Span{{}}},
+			[]*Spanset{
+				{Spans: []Span{&mockSpan{}}},
 			},
-			[]Spanset{},
+			[]*Spanset{},
 		},
 		{
 			"{ .foo1 = `a` } | { .foo2 = `b` }",
-			[]Spanset{
-				{Spans: []*Span{
+			[]*Spanset{
+				{Spans: []Span{
 					// First span should be dropped here
-					{Attributes: map[Attribute]Static{NewAttribute("foo1"): NewStaticString("a")}},
-					{Attributes: map[Attribute]Static{NewAttribute("foo1"): NewStaticString("a"), NewAttribute("foo2"): NewStaticString("b")}},
+					&mockSpan{attributes: map[Attribute]Static{NewAttribute("foo1"): NewStaticString("a")}},
+					&mockSpan{attributes: map[Attribute]Static{NewAttribute("foo1"): NewStaticString("a"), NewAttribute("foo2"): NewStaticString("b")}},
 				}},
 			},
-			[]Spanset{
-				{Spans: []*Span{
-					{Attributes: map[Attribute]Static{NewAttribute("foo1"): NewStaticString("a"), NewAttribute("foo2"): NewStaticString("b")}}}},
+			[]*Spanset{
+				{Spans: []Span{
+					&mockSpan{attributes: map[Attribute]Static{NewAttribute("foo1"): NewStaticString("a"), NewAttribute("foo2"): NewStaticString("b")}}}},
 			},
 		},
 	}
@@ -152,64 +152,64 @@ func TestPipelineEvaluate(t *testing.T) {
 func TestSpansetFilterEvaluate(t *testing.T) {
 	testCases := []struct {
 		query  string
-		input  []Spanset
-		output []Spanset
+		input  []*Spanset
+		output []*Spanset
 	}{
 		{
 			"{ true }",
-			[]Spanset{
+			[]*Spanset{
 				// Empty spanset is dropped
-				{Spans: []*Span{}},
-				{Spans: []*Span{{}}},
+				{Spans: []Span{}},
+				{Spans: []Span{&mockSpan{}}},
 			},
-			[]Spanset{
-				{Spans: []*Span{{}}},
+			[]*Spanset{
+				{Spans: []Span{&mockSpan{}}},
 			},
 		},
 		{
 			"{ .foo = `a` }",
-			[]Spanset{
-				{Spans: []*Span{
+			[]*Spanset{
+				{Spans: []Span{
 					// Second span should be dropped here
-					{Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("a")}},
-					{Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("b")}},
+					&mockSpan{attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("a")}},
+					&mockSpan{attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("b")}},
 				}},
-				{Spans: []*Span{
+				{Spans: []Span{
 					// This entire spanset will be dropped
-					{Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("b")}},
+					&mockSpan{attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("b")}},
 				}},
 			},
-			[]Spanset{
-				{Spans: []*Span{
-					{Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("a")}}}},
+			[]*Spanset{
+				{Spans: []Span{
+					&mockSpan{attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("a")}}}},
 			},
 		},
 		{
 			"{ .foo = 1 || (.foo >= 4 && .foo < 6) }",
-			[]Spanset{
-				{Spans: []*Span{
+			[]*Spanset{
+				{Spans: []Span{
 					// Second span should be dropped here
-					{Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticInt(1)}},
-					{Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticInt(2)}},
+					&mockSpan{attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticInt(1)}},
+					&mockSpan{attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticInt(2)}},
 				}},
-				{Spans: []*Span{
+				{Spans: []Span{
 					// First span should be dropped here
-					{Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticInt(3)}},
-					{Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticInt(4)}},
-					{Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticInt(5)}},
+					&mockSpan{attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticInt(3)}},
+					&mockSpan{attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticInt(4)}},
+					&mockSpan{attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticInt(5)}},
 				}},
-				{Spans: []*Span{
+				{Spans: []Span{
 					// Entire spanset should be dropped
-					{Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticInt(6)}},
-					{Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticInt(7)}},
+					&mockSpan{attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticInt(6)}},
+					&mockSpan{attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticInt(7)}},
 				}},
 			},
-			[]Spanset{
-				{Spans: []*Span{
-					{Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticInt(1)}}}},
-				{Spans: []*Span{
-					{Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticInt(4)}},
-					{Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticInt(5)}},
+			[]*Spanset{
+				{Spans: []Span{
+					&mockSpan{attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticInt(1)}}}},
+				{Spans: []Span{
+					&mockSpan{attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticInt(4)}},
+					&mockSpan{attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticInt(5)}},
 				}},
 			},
 		},
@@ -228,3 +228,25 @@ func TestSpansetFilterEvaluate(t *testing.T) {
 		})
 	}
 }
+
+var _ Span = (*mockSpan)(nil)
+
+type mockSpan struct {
+	id                 []byte
+	startTimeUnixNanos uint64
+	endTimeUnixNanos   uint64
+	attributes         map[Attribute]Static
+}
+
+func (m *mockSpan) Attributes() map[Attribute]Static {
+	return m.attributes
+}
+func (m *mockSpan) ID() []byte {
+	return m.id
+}
+func (m *mockSpan) StartTimeUnixNanos() uint64 {
+	return m.startTimeUnixNanos
+}
+func (m *mockSpan) EndtimeUnixNanos() uint64 {
+	return m.endTimeUnixNanos
+}
diff --git a/pkg/traceql/engine.go b/pkg/traceql/engine.go
index 2eb314bc9fa..4f2003b5907 100644
--- a/pkg/traceql/engine.go
+++ b/pkg/traceql/engine.go
@@ -38,12 +38,12 @@ func (e *Engine) Execute(ctx context.Context, searchReq *tempopb.SearchRequest,
 
 	spansetsEvaluated := 0
 	// set up the expression evaluation as a filter to reduce data pulled
-	fetchSpansRequest.Filter = func(inSS Spanset) ([]Spanset, error) {
+	fetchSpansRequest.Filter = func(inSS *Spanset) ([]*Spanset, error) {
 		if len(inSS.Spans) == 0 {
 			return nil, nil
 		}
 
-		evalSS, err := rootExpr.Pipeline.evaluate([]Spanset{inSS})
+		evalSS, err := rootExpr.Pipeline.evaluate([]*Spanset{inSS})
 		if err != nil {
 			span.LogKV("msg", "pipeline.evaluate", "err", err)
 			return nil, err
@@ -121,7 +121,7 @@ func (e *Engine) createFetchSpansRequest(searchReq *tempopb.SearchRequest, pipel
 	return req
 }
 
-func (e *Engine) asTraceSearchMetadata(spanset *SpansetMetadata) *tempopb.TraceSearchMetadata {
+func (e *Engine) asTraceSearchMetadata(spanset *Spanset) *tempopb.TraceSearchMetadata {
 	metadata := &tempopb.TraceSearchMetadata{
 		TraceID:           util.TraceIDToHexString(spanset.TraceID),
 		RootServiceName:   spanset.RootServiceName,
@@ -135,17 +135,19 @@ func (e *Engine) asTraceSearchMetadata(spanset *SpansetMetadata) *tempopb.TraceS
 
 	for _, span := range spanset.Spans {
 		tempopbSpan := &tempopb.Span{
-			SpanID:            util.TraceIDToHexString(span.ID),
-			StartTimeUnixNano: span.StartTimeUnixNanos,
-			DurationNanos:     span.EndtimeUnixNanos - span.StartTimeUnixNanos,
+			SpanID:            util.TraceIDToHexString(span.ID()),
+			StartTimeUnixNano: span.StartTimeUnixNanos(),
+			DurationNanos:     span.EndtimeUnixNanos() - span.StartTimeUnixNanos(),
 			Attributes:        nil,
 		}
 
-		if name, ok := span.Attributes[NewIntrinsic(IntrinsicName)]; ok {
+		atts := span.Attributes()
+
+		if name, ok := atts[NewIntrinsic(IntrinsicName)]; ok {
 			tempopbSpan.Name = name.S
 		}
 
-		for attribute, static := range span.Attributes {
+		for attribute, static := range atts {
 			if attribute.Intrinsic == IntrinsicName || attribute.Intrinsic == IntrinsicDuration {
 				continue
 			}
diff --git a/pkg/traceql/engine_test.go b/pkg/traceql/engine_test.go
index 30549a19098..ade817d9266 100644
--- a/pkg/traceql/engine_test.go
+++ b/pkg/traceql/engine_test.go
@@ -28,23 +28,23 @@ func TestEngine_Execute(t *testing.T) {
 	}
 	spanSetFetcher := MockSpanSetFetcher{
 		iterator: &MockSpanSetIterator{
-			results: []*SpansetMetadata{
+			results: []*Spanset{
 				{
 					TraceID:         []byte{1},
 					RootSpanName:    "HTTP GET",
 					RootServiceName: "my-service",
-					Spans: []SpanMetadata{
-						{
-							ID: []byte{1},
-							Attributes: map[Attribute]Static{
+					Spans: []Span{
+						&mockSpan{
+							id: []byte{1},
+							attributes: map[Attribute]Static{
 								NewAttribute("foo"): NewStaticString("value"),
 							},
 						},
-						{
-							ID:                 []byte{2},
-							StartTimeUnixNanos: uint64(now.UnixNano()),
-							EndtimeUnixNanos:   uint64(now.Add(100 * time.Millisecond).UnixNano()),
-							Attributes: map[Attribute]Static{
+						&mockSpan{
+							id:                 []byte{2},
+							startTimeUnixNanos: uint64(now.UnixNano()),
+							endTimeUnixNanos:   uint64(now.Add(100 * time.Millisecond).UnixNano()),
+							attributes: map[Attribute]Static{
 								NewAttribute("foo"): NewStaticString("value"),
 								NewAttribute("bar"): NewStaticString("value"),
 							},
@@ -55,10 +55,10 @@ func TestEngine_Execute(t *testing.T) {
 					TraceID:         []byte{2},
 					RootSpanName:    "HTTP POST",
 					RootServiceName: "my-service",
-					Spans: []SpanMetadata{
-						{
-							ID: []byte{3},
-							Attributes: map[Attribute]Static{
+					Spans: []Span{
+						&mockSpan{
+							id: []byte{3},
+							attributes: map[Attribute]Static{
 								NewAttribute("bar"): NewStaticString("value"),
 							},
 						},
@@ -144,18 +144,18 @@ func TestEngine_asTraceSearchMetadata(t *testing.T) {
 	spanID1 := traceID[:8]
 	spanID2 := traceID[8:]
 
-	spanSet := &SpansetMetadata{
+	spanSet := &Spanset{
 		TraceID:            traceID,
 		RootServiceName:    "my-service",
 		RootSpanName:       "HTTP GET",
 		StartTimeUnixNanos: 1000,
 		DurationNanos:      uint64(time.Second.Nanoseconds()),
-		Spans: []SpanMetadata{
-			{
-				ID:                 spanID1,
-				StartTimeUnixNanos: uint64(now.UnixNano()),
-				EndtimeUnixNanos:   uint64(now.Add(10 * time.Second).UnixNano()),
-				Attributes: map[Attribute]Static{
+		Spans: []Span{
+			&mockSpan{
+				id:                 spanID1,
+				startTimeUnixNanos: uint64(now.UnixNano()),
+				endTimeUnixNanos:   uint64(now.Add(10 * time.Second).UnixNano()),
+				attributes: map[Attribute]Static{
 					NewIntrinsic(IntrinsicName):     NewStaticString("HTTP GET"),
 					NewIntrinsic(IntrinsicStatus):   NewStaticStatus(StatusOk),
 					NewAttribute("cluster"):         NewStaticString("prod"),
@@ -165,11 +165,11 @@ func TestEngine_asTraceSearchMetadata(t *testing.T) {
 					NewIntrinsic(IntrinsicDuration): NewStaticDuration(10 * time.Second),
 				},
 			},
-			{
-				ID:                 spanID2,
-				StartTimeUnixNanos: uint64(now.Add(2 * time.Second).UnixNano()),
-				EndtimeUnixNanos:   uint64(now.Add(20 * time.Second).UnixNano()),
-				Attributes:         map[Attribute]Static{},
+			&mockSpan{
+				id:                 spanID2,
+				startTimeUnixNanos: uint64(now.Add(2 * time.Second).UnixNano()),
+				endTimeUnixNanos:   uint64(now.Add(20 * time.Second).UnixNano()),
+				attributes:         map[Attribute]Static{},
 			},
 		},
 	}
@@ -269,18 +269,18 @@ func (m *MockSpanSetFetcher) Fetch(ctx context.Context, request FetchSpansReques
 }
 
 type MockSpanSetIterator struct {
-	results []*SpansetMetadata
+	results []*Spanset
 	filter  FilterSpans
 }
 
-func (m *MockSpanSetIterator) Next(ctx context.Context) (*SpansetMetadata, error) {
+func (m *MockSpanSetIterator) Next(ctx context.Context) (*Spanset, error) {
 	if len(m.results) == 0 {
 		return nil, nil
 	}
 	r := m.results[0]
 	m.results = m.results[1:]
 
-	ss, err := m.filter(spansetFromMetaData(r))
+	ss, err := m.filter(r)
 	if err != nil {
 		return nil, err
 	}
@@ -292,17 +292,6 @@ func (m *MockSpanSetIterator) Next(ctx context.Context) (*SpansetMetadata, error
 	return r, nil
 }
 
-func spansetFromMetaData(meta *SpansetMetadata) Spanset {
-	ret := Spanset{}
-	for _, s := range meta.Spans {
-		s := &Span{
-			Attributes: s.Attributes,
-		}
-		ret.Spans = append(ret.Spans, s)
-	}
-	return ret
-}
-
 func newCondition(attr Attribute, op Operator, operands ...Static) Condition {
 	return Condition{
 		Attribute: attr,
diff --git a/pkg/traceql/storage.go b/pkg/traceql/storage.go
index ba7f1f18def..4383b4e943f 100644
--- a/pkg/traceql/storage.go
+++ b/pkg/traceql/storage.go
@@ -2,8 +2,6 @@ package traceql
 
 import (
 	"context"
-
-	"github.com/grafana/tempo/pkg/parquetquery"
 )
 
 type Operands []Static
@@ -19,7 +17,7 @@ type Condition struct {
 // spans it is discarded and will not appear in FetchSpansResponse. The bool
 // return value is used to indicate if the Fetcher should continue iterating or if
 // it can bail out.
-type FilterSpans func(Spanset) ([]Spanset, error)
+type FilterSpans func(*Spanset) ([]*Spanset, error)
 
 type FetchSpansRequest struct {
 	StartTimeUnixNanos uint64
@@ -48,40 +46,30 @@ func (f *FetchSpansRequest) appendCondition(c ...Condition) {
 	f.Conditions = append(f.Conditions, c...)
 }
 
-type Span struct {
-	// these metadata fields are used by the storage layer to fetch spans
-	// todo: make this an interface so that the storage layer can track this info w/o muddying the engine
-	RowNum             parquetquery.RowNumber
-	StartTimeUnixNanos uint64
-	EndtimeUnixNanos   uint64
-
+type Span interface {
 	// these are the actual fields used by the engine to evaluate queries
-	Attributes map[Attribute]Static
+	// if a Filter parameter is passed the spans returned will only have this field populated
+	Attributes() map[Attribute]Static
+
+	ID() []byte
+	StartTimeUnixNanos() uint64
+	EndtimeUnixNanos() uint64
 }
 
 type Spanset struct {
+	// these fields are actually used by the engine to evaluate queries
 	Scalar Static
-	Spans  []*Span
-}
-
-type SpanMetadata struct {
-	ID                 []byte
-	StartTimeUnixNanos uint64
-	EndtimeUnixNanos   uint64
-	Attributes         map[Attribute]Static
-}
+	Spans  []Span
 
-type SpansetMetadata struct {
 	TraceID            []byte
 	RootSpanName       string
 	RootServiceName    string
 	StartTimeUnixNanos uint64
 	DurationNanos      uint64
-	Spans              []SpanMetadata
 }
 
 type SpansetIterator interface {
-	Next(context.Context) (*SpansetMetadata, error)
+	Next(context.Context) (*Spanset, error)
 }
 
 type FetchSpansResponse struct {
diff --git a/tempodb/encoding/vparquet/block_traceql.go b/tempodb/encoding/vparquet/block_traceql.go
index 7f6c569e69f..3ab18e506f3 100644
--- a/tempodb/encoding/vparquet/block_traceql.go
+++ b/tempodb/encoding/vparquet/block_traceql.go
@@ -17,6 +17,28 @@ import (
 	"github.com/grafana/tempo/tempodb/encoding/common"
 )
 
+// span implements traceql.Span
+type span struct {
+	attributes         map[traceql.Attribute]traceql.Static
+	id                 []byte
+	startTimeUnixNanos uint64
+	endtimeUnixNanos   uint64
+	rowNum             parquetquery.RowNumber
+}
+
+func (s *span) Attributes() map[traceql.Attribute]traceql.Static {
+	return s.attributes
+}
+func (s *span) ID() []byte {
+	return s.id
+}
+func (s *span) StartTimeUnixNanos() uint64 {
+	return s.startTimeUnixNanos
+}
+func (s *span) EndtimeUnixNanos() uint64 {
+	return s.endtimeUnixNanos
+}
+
 // todo: this sync pool currently massively reduces allocations by pooling spans for certain queries.
 // it currently catches spans discarded:
 // - in the span collector
@@ -26,27 +48,28 @@ import (
 // can return a slice of dropped and kept spansets?
 var spanPool = sync.Pool{
 	New: func() interface{} {
-		return &traceql.Span{
-			Attributes: make(map[traceql.Attribute]traceql.Static),
+		return &span{
+			attributes: make(map[traceql.Attribute]traceql.Static),
 		}
 	},
 }
 
-func putSpan(s *traceql.Span) {
-	s.EndtimeUnixNanos = 0
-	s.StartTimeUnixNanos = 0
-	s.RowNum = parquetquery.EmptyRowNumber()
+func putSpan(s *span) {
+	s.id = nil
+	s.endtimeUnixNanos = 0
+	s.startTimeUnixNanos = 0
+	s.rowNum = parquetquery.EmptyRowNumber()
 
 	// clear attributes
-	for k := range s.Attributes {
-		delete(s.Attributes, k)
+	for k := range s.attributes {
+		delete(s.attributes, k)
 	}
 
 	spanPool.Put(s)
 }
 
-func getSpan() *traceql.Span {
-	return spanPool.Get().(*traceql.Span)
+func getSpan() *span {
+	return spanPool.Get().(*span)
 }
 
 // Helper function to create an iterator, that abstracts away
@@ -205,7 +228,7 @@ type spansetIterator struct {
 	iter   parquetquery.Iterator
 	filter traceql.FilterSpans
 
-	currentSpans []*traceql.Span
+	currentSpans []*span
 }
 
 func newSpansetIterator(iter parquetquery.Iterator, filter traceql.FilterSpans) *spansetIterator {
@@ -215,7 +238,7 @@ func newSpansetIterator(iter parquetquery.Iterator, filter traceql.FilterSpans)
 	}
 }
 
-func (i *spansetIterator) Next() (*traceql.Span, error) {
+func (i *spansetIterator) Next() (*span, error) {
 	// drain current buffer
 	if len(i.currentSpans) > 0 {
 		ret := i.currentSpans[0]
@@ -245,20 +268,20 @@ func (i *spansetIterator) Next() (*traceql.Span, error) {
 		// TODO: the engine wants to work with value types, but the fetch layer is using pointers. as a result
 		//  there is some gross "bridge" code here that converts between the two. We should write benchmarks
 		//  and determine if moving to or the other is worth it
-		var filteredSpansets []traceql.Spanset
+		var filteredSpansets []*traceql.Spanset
 		if i.filter != nil {
-			filteredSpansets, err = i.filter(*spanset)
+			filteredSpansets, err = i.filter(spanset)
 			if err != nil {
 				return nil, err
 			}
 		} else {
-			filteredSpansets = []traceql.Spanset{*spanset}
+			filteredSpansets = []*traceql.Spanset{spanset}
 		}
 
 		// flatten spans into i.currentSpans
 		for _, ss := range filteredSpansets {
 			for _, s := range ss.Spans {
-				span := s
+				span := s.(*span)
 				i.currentSpans = append(i.currentSpans, span)
 			}
 		}
@@ -281,7 +304,7 @@ type mergeSpansetIterator struct {
 
 var _ traceql.SpansetIterator = (*mergeSpansetIterator)(nil)
 
-func (i *mergeSpansetIterator) Next(ctx context.Context) (*traceql.SpansetMetadata, error) {
+func (i *mergeSpansetIterator) Next(ctx context.Context) (*traceql.Spanset, error) {
 	if i.cur >= len(i.iters) {
 		return nil, nil
 	}
@@ -1036,10 +1059,10 @@ func (c *spanCollector) String() string {
 
 func (c *spanCollector) KeepGroup(res *parquetquery.IteratorResult) bool {
 	span := getSpan()
-	span.RowNum = res.RowNumber
+	span.rowNum = res.RowNumber
 
 	for _, e := range res.OtherEntries {
-		span.Attributes[newSpanAttr(e.Key)] = e.Value.(traceql.Static)
+		span.attributes[newSpanAttr(e.Key)] = e.Value.(traceql.Static)
 	}
 
 	var startTimeUnixNanos, endTimeUnixNanos uint64
@@ -1049,12 +1072,12 @@ func (c *spanCollector) KeepGroup(res *parquetquery.IteratorResult) bool {
 		switch kv.Key {
 		case columnPathSpanStartTime:
 			startTimeUnixNanos = kv.Value.Uint64()
-			span.StartTimeUnixNanos = startTimeUnixNanos
+			span.startTimeUnixNanos = startTimeUnixNanos
 		case columnPathSpanEndTime:
 			endTimeUnixNanos = kv.Value.Uint64()
-			span.EndtimeUnixNanos = endTimeUnixNanos
+			span.endtimeUnixNanos = endTimeUnixNanos
 		case columnPathSpanName:
-			span.Attributes[traceql.NewIntrinsic(traceql.IntrinsicName)] = traceql.NewStaticString(kv.Value.String())
+			span.attributes[traceql.NewIntrinsic(traceql.IntrinsicName)] = traceql.NewStaticString(kv.Value.String())
 		//case columnPathSpanDuration:
 		//	span.Attributes[traceql.NewIntrinsic(traceql.IntrinsicDuration)] = traceql.NewStaticDuration(time.Duration(kv.Value.Uint64()))
 		case columnPathSpanStatusCode:
@@ -1071,25 +1094,25 @@ func (c *spanCollector) KeepGroup(res *parquetquery.IteratorResult) bool {
 			default:
 				status = traceql.Status(kv.Value.Uint64())
 			}
-			span.Attributes[traceql.NewIntrinsic(traceql.IntrinsicStatus)] = traceql.NewStaticStatus(status)
+			span.attributes[traceql.NewIntrinsic(traceql.IntrinsicStatus)] = traceql.NewStaticStatus(status)
 		case columnPathSpanKind:
 			// if we're actually retrieving kind then keep it, otherwise it's just being used to count spans and we should not
 			//  include it in our attributes
 			if !c.kindAsCount {
-				span.Attributes[newSpanAttr(columnPathSpanKind)] = traceql.NewStaticInt(int(kv.Value.Int64()))
+				span.attributes[newSpanAttr(columnPathSpanKind)] = traceql.NewStaticInt(int(kv.Value.Int64()))
 			}
 		default:
 			// TODO - This exists for span-level dedicated columns like http.status_code
 			// Are nils possible here?
 			switch kv.Value.Kind() {
 			case parquet.Boolean:
-				span.Attributes[newSpanAttr(kv.Key)] = traceql.NewStaticBool(kv.Value.Boolean())
+				span.attributes[newSpanAttr(kv.Key)] = traceql.NewStaticBool(kv.Value.Boolean())
 			case parquet.Int32, parquet.Int64:
-				span.Attributes[newSpanAttr(kv.Key)] = traceql.NewStaticInt(int(kv.Value.Int64()))
+				span.attributes[newSpanAttr(kv.Key)] = traceql.NewStaticInt(int(kv.Value.Int64()))
 			case parquet.Float:
-				span.Attributes[newSpanAttr(kv.Key)] = traceql.NewStaticFloat(kv.Value.Double())
+				span.attributes[newSpanAttr(kv.Key)] = traceql.NewStaticFloat(kv.Value.Double())
 			case parquet.ByteArray:
-				span.Attributes[newSpanAttr(kv.Key)] = traceql.NewStaticString(kv.Value.String())
+				span.attributes[newSpanAttr(kv.Key)] = traceql.NewStaticString(kv.Value.String())
 			}
 		}
 	}
@@ -1099,7 +1122,7 @@ func (c *spanCollector) KeepGroup(res *parquetquery.IteratorResult) bool {
 		duration := endTimeUnixNanos - startTimeUnixNanos
 		for _, f := range c.durationFilters {
 			if f == nil || f.Fn(int64(duration)) {
-				span.Attributes[traceql.NewIntrinsic(traceql.IntrinsicDuration)] = traceql.NewStaticDuration(time.Duration(duration))
+				span.attributes[traceql.NewIntrinsic(traceql.IntrinsicDuration)] = traceql.NewStaticDuration(time.Duration(duration))
 				break
 			}
 		}
@@ -1107,7 +1130,7 @@ func (c *spanCollector) KeepGroup(res *parquetquery.IteratorResult) bool {
 
 	if c.minAttributes > 0 {
 		count := 0
-		for _, v := range span.Attributes {
+		for _, v := range span.attributes {
 			if v.Type != traceql.TypeNil {
 				count++
 			}
@@ -1133,7 +1156,7 @@ type batchCollector struct {
 
 	// shared static spans used in KeepGroup. done for memory savings, but won't
 	// work if the batchCollector is accessed concurrently
-	keepGroupSpans []*traceql.Span
+	keepGroupSpans []*span
 }
 
 var _ parquetquery.GroupPredicate = (*batchCollector)(nil)
@@ -1152,7 +1175,7 @@ func (c *batchCollector) KeepGroup(res *parquetquery.IteratorResult) bool {
 
 	resAttrs := make(map[traceql.Attribute]traceql.Static)
 	for _, kv := range res.OtherEntries {
-		if span, ok := kv.Value.(*traceql.Span); ok {
+		if span, ok := kv.Value.(*span); ok {
 			c.keepGroupSpans = append(c.keepGroupSpans, span)
 			continue
 		}
@@ -1185,34 +1208,37 @@ func (c *batchCollector) KeepGroup(res *parquetquery.IteratorResult) bool {
 	// Copy resource-level attributes to the individual spans now
 	for k, v := range resAttrs {
 		for _, span := range c.keepGroupSpans {
-			if _, alreadyExists := span.Attributes[k]; !alreadyExists {
-				span.Attributes[k] = v
+			if _, alreadyExists := span.attributes[k]; !alreadyExists {
+				span.attributes[k] = v
 			}
 		}
 	}
 
 	// Remove unmatched attributes
 	for _, span := range c.keepGroupSpans {
-		for k, v := range span.Attributes {
+		for k, v := range span.attributes {
 			if v.Type == traceql.TypeNil {
-				delete(span.Attributes, k)
+				delete(span.attributes, k)
 			}
 		}
 	}
 
-	var filteredSpans []*traceql.Span
+	var filteredSpans []traceql.Span
 
 	// Copy over only spans that met minimum criteria
 	if c.requireAtLeastOneMatchOverall {
 		for _, span := range c.keepGroupSpans {
-			if len(span.Attributes) > 0 {
+			if len(span.attributes) > 0 {
 				filteredSpans = append(filteredSpans, span)
 				continue
 			}
 			putSpan(span)
 		}
 	} else {
-		filteredSpans = append([]*traceql.Span(nil), c.keepGroupSpans...)
+		filteredSpans = make([]traceql.Span, 0, len(c.keepGroupSpans))
+		for _, span := range c.keepGroupSpans {
+			filteredSpans = append([]traceql.Span(nil), span)
+		}
 	}
 
 	// Throw out batches without any spans
diff --git a/tempodb/encoding/vparquet/block_traceql_meta.go b/tempodb/encoding/vparquet/block_traceql_meta.go
index 9ace9ae33e5..b28caca241e 100644
--- a/tempodb/encoding/vparquet/block_traceql_meta.go
+++ b/tempodb/encoding/vparquet/block_traceql_meta.go
@@ -57,7 +57,7 @@ func (i *spansToMetaIterator) Next() (*pq.IteratorResult, error) {
 		return nil, nil
 	}
 
-	res := &pq.IteratorResult{RowNumber: next.RowNum}
+	res := &pq.IteratorResult{RowNumber: next.rowNum}
 	res.AppendOtherValue(otherEntrySpanKey, next)
 
 	return res, nil
@@ -91,35 +91,27 @@ func (c *spanMetaCollector) String() string {
 func (c *spanMetaCollector) KeepGroup(res *parquetquery.IteratorResult) bool {
 	// extract the span from the iterator result and steal it's attributes
 	// this is where we convert a traceql.Span to a traceql.SpanMetadata
-	span, ok := res.OtherValueFromKey(otherEntrySpanKey).(*traceql.Span)
+	span, ok := res.OtherValueFromKey(otherEntrySpanKey).(*span)
 	if !ok {
 		return false // something very wrong happened. should we panic?
 	}
 
-	spanMetadata := &traceql.SpanMetadata{
-		StartTimeUnixNanos: span.StartTimeUnixNanos,
-		EndtimeUnixNanos:   span.EndtimeUnixNanos,
-		Attributes:         span.Attributes,
-	}
-	span.Attributes = make(map[traceql.Attribute]traceql.Static) // we have to overwrite the attributes b/c putSpan will attempt to reuse them
-	putSpan(span)
-
 	// span start/end time may come from span attributes or it may come from
 	// the iterator results. if we find it in the iterator results, use that
 	for _, kv := range res.Entries {
 		switch kv.Key {
 		case columnPathSpanID:
-			spanMetadata.ID = kv.Value.ByteArray()
+			span.id = kv.Value.ByteArray()
 		case columnPathSpanStartTime:
-			spanMetadata.StartTimeUnixNanos = kv.Value.Uint64()
+			span.startTimeUnixNanos = kv.Value.Uint64()
 		case columnPathSpanEndTime:
-			spanMetadata.EndtimeUnixNanos = kv.Value.Uint64()
+			span.endtimeUnixNanos = kv.Value.Uint64()
 		}
 	}
 
 	res.Entries = res.Entries[:0]
 	res.OtherEntries = res.OtherEntries[:0]
-	res.AppendOtherValue(otherEntrySpanKey, spanMetadata)
+	res.AppendOtherValue(otherEntrySpanKey, span)
 
 	return true
 }
@@ -137,7 +129,7 @@ func (c *traceMetaCollector) String() string {
 }
 
 func (c *traceMetaCollector) KeepGroup(res *parquetquery.IteratorResult) bool {
-	finalSpanset := &traceql.SpansetMetadata{}
+	finalSpanset := &traceql.Spanset{}
 
 	for _, e := range res.Entries {
 		switch e.Key {
@@ -157,8 +149,8 @@ func (c *traceMetaCollector) KeepGroup(res *parquetquery.IteratorResult) bool {
 	// we're copying spans directly from the spanMetaIterator into the traceMetaIterator
 	//  this skips the step of the batchIterator present on the normal fetch path
 	for _, e := range res.OtherEntries {
-		if span, ok := e.Value.(*traceql.SpanMetadata); ok {
-			finalSpanset.Spans = append(finalSpanset.Spans, *span)
+		if span, ok := e.Value.(*span); ok {
+			finalSpanset.Spans = append(finalSpanset.Spans, span)
 		}
 	}
 
@@ -183,7 +175,7 @@ func newSpansetMetadataIterator(iter parquetquery.Iterator) *spansetMetadataIter
 	}
 }
 
-func (i *spansetMetadataIterator) Next(ctx context.Context) (*traceql.SpansetMetadata, error) {
+func (i *spansetMetadataIterator) Next(ctx context.Context) (*traceql.Spanset, error) {
 	res, err := i.iter.Next()
 	if err != nil {
 		return nil, err
@@ -197,7 +189,7 @@ func (i *spansetMetadataIterator) Next(ctx context.Context) (*traceql.SpansetMet
 	if iface == nil {
 		return nil, fmt.Errorf("engine assumption broken: spanset not found in other entries")
 	}
-	ss, ok := iface.(*traceql.SpansetMetadata)
+	ss, ok := iface.(*traceql.Spanset)
 	if !ok {
 		return nil, fmt.Errorf("engine assumption broken: spanset is not of type *traceql.Spanset")
 	}
diff --git a/tempodb/encoding/vparquet/block_traceql_meta_test.go b/tempodb/encoding/vparquet/block_traceql_meta_test.go
index 35cfe4d0eb6..eef5b4d2fbc 100644
--- a/tempodb/encoding/vparquet/block_traceql_meta_test.go
+++ b/tempodb/encoding/vparquet/block_traceql_meta_test.go
@@ -5,6 +5,7 @@ import (
 	"testing"
 	"time"
 
+	"github.com/grafana/tempo/pkg/parquetquery"
 	"github.com/grafana/tempo/pkg/traceql"
 	"github.com/grafana/tempo/tempodb/encoding/common"
 	"github.com/stretchr/testify/require"
@@ -17,12 +18,12 @@ func TestBackendBlockSearchFetchMetaData(t *testing.T) {
 
 	// Helper functions to make requests
 
-	makeSpansets := func(sets ...*traceql.SpansetMetadata) []*traceql.SpansetMetadata {
+	makeSpansets := func(sets ...*traceql.Spanset) []*traceql.Spanset {
 		return sets
 	}
 
-	makeSpanset := func(traceID []byte, rootSpanName, rootServiceName string, startTimeUnixNano, durationNanos uint64, spans ...traceql.SpanMetadata) *traceql.SpansetMetadata {
-		return &traceql.SpansetMetadata{
+	makeSpanset := func(traceID []byte, rootSpanName, rootServiceName string, startTimeUnixNano, durationNanos uint64, spans ...traceql.Span) *traceql.Spanset {
+		return &traceql.Spanset{
 			TraceID:            traceID,
 			RootSpanName:       rootSpanName,
 			RootServiceName:    rootServiceName,
@@ -34,7 +35,7 @@ func TestBackendBlockSearchFetchMetaData(t *testing.T) {
 
 	testCases := []struct {
 		req             traceql.FetchSpansRequest
-		expectedResults []*traceql.SpansetMetadata
+		expectedResults []*traceql.Spanset
 	}{
 		{
 			// Empty request returns 1 spanset with all spans
@@ -46,17 +47,17 @@ func TestBackendBlockSearchFetchMetaData(t *testing.T) {
 					wantTr.RootServiceName,
 					wantTr.StartTimeUnixNano,
 					wantTr.DurationNanos,
-					traceql.SpanMetadata{
-						ID:                 wantTr.ResourceSpans[0].ScopeSpans[0].Spans[0].ID,
-						StartTimeUnixNanos: wantTr.ResourceSpans[0].ScopeSpans[0].Spans[0].StartUnixNanos,
-						EndtimeUnixNanos:   wantTr.ResourceSpans[0].ScopeSpans[0].Spans[0].EndUnixNanos,
-						Attributes:         map[traceql.Attribute]traceql.Static{},
+					&span{
+						id:                 wantTr.ResourceSpans[0].ScopeSpans[0].Spans[0].ID,
+						startTimeUnixNanos: wantTr.ResourceSpans[0].ScopeSpans[0].Spans[0].StartUnixNanos,
+						endtimeUnixNanos:   wantTr.ResourceSpans[0].ScopeSpans[0].Spans[0].EndUnixNanos,
+						attributes:         map[traceql.Attribute]traceql.Static{},
 					},
-					traceql.SpanMetadata{
-						ID:                 wantTr.ResourceSpans[1].ScopeSpans[0].Spans[0].ID,
-						StartTimeUnixNanos: wantTr.ResourceSpans[1].ScopeSpans[0].Spans[0].StartUnixNanos,
-						EndtimeUnixNanos:   wantTr.ResourceSpans[1].ScopeSpans[0].Spans[0].EndUnixNanos,
-						Attributes:         map[traceql.Attribute]traceql.Static{},
+					&span{
+						id:                 wantTr.ResourceSpans[1].ScopeSpans[0].Spans[0].ID,
+						startTimeUnixNanos: wantTr.ResourceSpans[1].ScopeSpans[0].Spans[0].StartUnixNanos,
+						endtimeUnixNanos:   wantTr.ResourceSpans[1].ScopeSpans[0].Spans[0].EndUnixNanos,
+						attributes:         map[traceql.Attribute]traceql.Static{},
 					},
 				),
 			),
@@ -75,11 +76,11 @@ func TestBackendBlockSearchFetchMetaData(t *testing.T) {
 					wantTr.RootServiceName,
 					wantTr.StartTimeUnixNano,
 					wantTr.DurationNanos,
-					traceql.SpanMetadata{
-						ID:                 wantTr.ResourceSpans[0].ScopeSpans[0].Spans[0].ID,
-						StartTimeUnixNanos: wantTr.ResourceSpans[0].ScopeSpans[0].Spans[0].StartUnixNanos,
-						EndtimeUnixNanos:   wantTr.ResourceSpans[0].ScopeSpans[0].Spans[0].EndUnixNanos,
-						Attributes: map[traceql.Attribute]traceql.Static{
+					&span{
+						id:                 wantTr.ResourceSpans[0].ScopeSpans[0].Spans[0].ID,
+						startTimeUnixNanos: wantTr.ResourceSpans[0].ScopeSpans[0].Spans[0].StartUnixNanos,
+						endtimeUnixNanos:   wantTr.ResourceSpans[0].ScopeSpans[0].Spans[0].EndUnixNanos,
+						attributes: map[traceql.Attribute]traceql.Static{
 							// foo not returned because the span didn't match it
 							traceql.NewScopedAttribute(traceql.AttributeScopeSpan, false, "bar"): traceql.NewStaticInt(123),
 						},
@@ -100,11 +101,11 @@ func TestBackendBlockSearchFetchMetaData(t *testing.T) {
 					wantTr.RootServiceName,
 					wantTr.StartTimeUnixNano,
 					wantTr.DurationNanos,
-					traceql.SpanMetadata{
-						ID:                 wantTr.ResourceSpans[0].ScopeSpans[0].Spans[0].ID,
-						StartTimeUnixNanos: wantTr.ResourceSpans[0].ScopeSpans[0].Spans[0].StartUnixNanos,
-						EndtimeUnixNanos:   wantTr.ResourceSpans[0].ScopeSpans[0].Spans[0].EndUnixNanos,
-						Attributes: map[traceql.Attribute]traceql.Static{
+					&span{
+						id:                 wantTr.ResourceSpans[0].ScopeSpans[0].Spans[0].ID,
+						startTimeUnixNanos: wantTr.ResourceSpans[0].ScopeSpans[0].Spans[0].StartUnixNanos,
+						endtimeUnixNanos:   wantTr.ResourceSpans[0].ScopeSpans[0].Spans[0].EndUnixNanos,
+						attributes: map[traceql.Attribute]traceql.Static{
 							// Foo matched on resource.
 							// TODO - This seems misleading since the span has foo=<something else>
 							//        but for this query we never even looked at span attribute columns.
@@ -128,11 +129,11 @@ func TestBackendBlockSearchFetchMetaData(t *testing.T) {
 					wantTr.RootServiceName,
 					wantTr.StartTimeUnixNano,
 					wantTr.DurationNanos,
-					traceql.SpanMetadata{
-						ID:                 wantTr.ResourceSpans[0].ScopeSpans[0].Spans[0].ID,
-						StartTimeUnixNanos: wantTr.ResourceSpans[0].ScopeSpans[0].Spans[0].StartUnixNanos,
-						EndtimeUnixNanos:   wantTr.ResourceSpans[0].ScopeSpans[0].Spans[0].EndUnixNanos,
-						Attributes: map[traceql.Attribute]traceql.Static{
+					&span{
+						id:                 wantTr.ResourceSpans[0].ScopeSpans[0].Spans[0].ID,
+						startTimeUnixNanos: wantTr.ResourceSpans[0].ScopeSpans[0].Spans[0].StartUnixNanos,
+						endtimeUnixNanos:   wantTr.ResourceSpans[0].ScopeSpans[0].Spans[0].EndUnixNanos,
+						attributes: map[traceql.Attribute]traceql.Static{
 							newSpanAttr(LabelHTTPStatusCode): traceql.NewStaticInt(500), // This is the only attribute that matched anything
 						},
 					},
@@ -155,11 +156,11 @@ func TestBackendBlockSearchFetchMetaData(t *testing.T) {
 					wantTr.RootServiceName,
 					wantTr.StartTimeUnixNano,
 					wantTr.DurationNanos,
-					traceql.SpanMetadata{
-						ID:                 wantTr.ResourceSpans[0].ScopeSpans[0].Spans[0].ID,
-						StartTimeUnixNanos: wantTr.ResourceSpans[0].ScopeSpans[0].Spans[0].StartUnixNanos,
-						EndtimeUnixNanos:   wantTr.ResourceSpans[0].ScopeSpans[0].Spans[0].EndUnixNanos,
-						Attributes: map[traceql.Attribute]traceql.Static{
+					&span{
+						id:                 wantTr.ResourceSpans[0].ScopeSpans[0].Spans[0].ID,
+						startTimeUnixNanos: wantTr.ResourceSpans[0].ScopeSpans[0].Spans[0].StartUnixNanos,
+						endtimeUnixNanos:   wantTr.ResourceSpans[0].ScopeSpans[0].Spans[0].EndUnixNanos,
+						attributes: map[traceql.Attribute]traceql.Static{
 							newResAttr("foo"):                traceql.NewStaticString("abc"), // Both are returned
 							newSpanAttr("foo"):               traceql.NewStaticString("def"), // Both are returned
 							newSpanAttr(LabelHTTPStatusCode): traceql.NewStaticInt(500),
@@ -190,11 +191,11 @@ func TestBackendBlockSearchFetchMetaData(t *testing.T) {
 					wantTr.RootServiceName,
 					wantTr.StartTimeUnixNano,
 					wantTr.DurationNanos,
-					traceql.SpanMetadata{
-						ID:                 wantTr.ResourceSpans[1].ScopeSpans[0].Spans[0].ID,
-						StartTimeUnixNanos: wantTr.ResourceSpans[1].ScopeSpans[0].Spans[0].StartUnixNanos,
-						EndtimeUnixNanos:   wantTr.ResourceSpans[1].ScopeSpans[0].Spans[0].EndUnixNanos,
-						Attributes: map[traceql.Attribute]traceql.Static{
+					&span{
+						id:                 wantTr.ResourceSpans[1].ScopeSpans[0].Spans[0].ID,
+						startTimeUnixNanos: wantTr.ResourceSpans[1].ScopeSpans[0].Spans[0].StartUnixNanos,
+						endtimeUnixNanos:   wantTr.ResourceSpans[1].ScopeSpans[0].Spans[0].EndUnixNanos,
+						attributes: map[traceql.Attribute]traceql.Static{
 							traceql.NewIntrinsic(traceql.IntrinsicName):   traceql.NewStaticString("world"),
 							traceql.NewIntrinsic(traceql.IntrinsicStatus): traceql.NewStaticStatus(traceql.StatusUnset),
 						},
@@ -212,19 +213,19 @@ func TestBackendBlockSearchFetchMetaData(t *testing.T) {
 					wantTr.RootServiceName,
 					wantTr.StartTimeUnixNano,
 					wantTr.DurationNanos,
-					traceql.SpanMetadata{
-						ID:                 wantTr.ResourceSpans[0].ScopeSpans[0].Spans[0].ID,
-						StartTimeUnixNanos: wantTr.ResourceSpans[0].ScopeSpans[0].Spans[0].StartUnixNanos,
-						EndtimeUnixNanos:   wantTr.ResourceSpans[0].ScopeSpans[0].Spans[0].EndUnixNanos,
-						Attributes: map[traceql.Attribute]traceql.Static{
+					&span{
+						id:                 wantTr.ResourceSpans[0].ScopeSpans[0].Spans[0].ID,
+						startTimeUnixNanos: wantTr.ResourceSpans[0].ScopeSpans[0].Spans[0].StartUnixNanos,
+						endtimeUnixNanos:   wantTr.ResourceSpans[0].ScopeSpans[0].Spans[0].EndUnixNanos,
+						attributes: map[traceql.Attribute]traceql.Static{
 							traceql.NewIntrinsic(traceql.IntrinsicDuration): traceql.NewStaticDuration(100 * time.Second),
 						},
 					},
-					traceql.SpanMetadata{
-						ID:                 wantTr.ResourceSpans[1].ScopeSpans[0].Spans[0].ID,
-						StartTimeUnixNanos: wantTr.ResourceSpans[1].ScopeSpans[0].Spans[0].StartUnixNanos,
-						EndtimeUnixNanos:   wantTr.ResourceSpans[1].ScopeSpans[0].Spans[0].EndUnixNanos,
-						Attributes: map[traceql.Attribute]traceql.Static{
+					&span{
+						id:                 wantTr.ResourceSpans[1].ScopeSpans[0].Spans[0].ID,
+						startTimeUnixNanos: wantTr.ResourceSpans[1].ScopeSpans[0].Spans[0].StartUnixNanos,
+						endtimeUnixNanos:   wantTr.ResourceSpans[1].ScopeSpans[0].Spans[0].EndUnixNanos,
+						attributes: map[traceql.Attribute]traceql.Static{
 							traceql.NewIntrinsic(traceql.IntrinsicDuration): traceql.NewStaticDuration(0 * time.Second),
 						},
 					},
@@ -239,7 +240,7 @@ func TestBackendBlockSearchFetchMetaData(t *testing.T) {
 		require.NoError(t, err, "search request:", req)
 
 		// Turn iterator into slice
-		var ss []*traceql.SpansetMetadata
+		var ss []*traceql.Spanset
 		for {
 			spanSet, err := resp.Results.Next(ctx)
 			require.NoError(t, err)
@@ -249,6 +250,14 @@ func TestBackendBlockSearchFetchMetaData(t *testing.T) {
 			ss = append(ss, spanSet)
 		}
 
+		// equal will fail on the rownum mismatches. this is an internal detail to the
+		// fetch layer. just wipe them out here
+		for _, s := range ss {
+			for _, sp := range s.Spans {
+				sp.(*span).rowNum = parquetquery.RowNumber{}
+			}
+		}
+
 		require.Equal(t, tc.expectedResults, ss, "search request:", req)
 	}
 }

From ebe27ee2954c7171a5a5c9c7e398af2d45450a4e Mon Sep 17 00:00:00 2001
From: Joe Elliott <number101010@gmail.com>
Date: Mon, 27 Feb 2023 15:26:18 -0500
Subject: [PATCH 22/27] readded span ids

Signed-off-by: Joe Elliott <number101010@gmail.com>
---
 pkg/traceql/ast_execute_test.go | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/pkg/traceql/ast_execute_test.go b/pkg/traceql/ast_execute_test.go
index 088da16718d..212f602c075 100644
--- a/pkg/traceql/ast_execute_test.go
+++ b/pkg/traceql/ast_execute_test.go
@@ -157,18 +157,18 @@ func TestSpansetOperationEvaluate(t *testing.T) {
 			[]*Spanset{
 				{Spans: []Span{
 					// This spanset will be kept because it satisfies both conditions
-					&mockSpan{attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("a")}},
-					&mockSpan{attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("b")}},
+					&mockSpan{id: []byte{1}, attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("a")}},
+					&mockSpan{id: []byte{2}, attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("b")}},
 				}},
 				{Spans: []Span{
 					// This spanset will be dropped
-					&mockSpan{attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("b")}},
+					&mockSpan{id: []byte{3}, attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("b")}},
 				}},
 			},
 			[]*Spanset{
 				{Spans: []Span{
-					&mockSpan{attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("a")}},
-					&mockSpan{attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("b")}},
+					&mockSpan{id: []byte{1}, attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("a")}},
+					&mockSpan{id: []byte{2}, attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("b")}},
 				}},
 			},
 		},
@@ -176,22 +176,22 @@ func TestSpansetOperationEvaluate(t *testing.T) {
 			"{ .foo = `a` } || { .foo = `b` }",
 			[]*Spanset{
 				{Spans: []Span{
-					&mockSpan{attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("a")}},
-					&mockSpan{attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("b")}},
+					&mockSpan{id: []byte{1}, attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("a")}},
+					&mockSpan{id: []byte{2}, attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("b")}},
 				}},
 				{Spans: []Span{
 					// Second span will be dropped
-					&mockSpan{attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("b")}},
-					&mockSpan{attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("c")}},
+					&mockSpan{id: []byte{3}, attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("b")}},
+					&mockSpan{id: []byte{4}, attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("c")}},
 				}},
 			},
 			[]*Spanset{
 				{Spans: []Span{
-					&mockSpan{attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("a")}},
-					&mockSpan{attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("b")}},
+					&mockSpan{id: []byte{1}, attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("a")}},
+					&mockSpan{id: []byte{2}, attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("b")}},
 				}},
 				{Spans: []Span{
-					&mockSpan{attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("b")}},
+					&mockSpan{id: []byte{3}, attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticString("b")}},
 				}},
 			},
 		},

From 8c0d60b802026465b7e2ca8cbebe0e8e2c31f4f3 Mon Sep 17 00:00:00 2001
From: Joe Elliott <number101010@gmail.com>
Date: Mon, 27 Feb 2023 20:12:43 -0500
Subject: [PATCH 23/27] remove kindAsCount

Signed-off-by: Joe Elliott <number101010@gmail.com>
---
 tempodb/encoding/vparquet/block_traceql.go | 12 ++----------
 1 file changed, 2 insertions(+), 10 deletions(-)

diff --git a/tempodb/encoding/vparquet/block_traceql.go b/tempodb/encoding/vparquet/block_traceql.go
index 3ab18e506f3..f5e2df997d2 100644
--- a/tempodb/encoding/vparquet/block_traceql.go
+++ b/tempodb/encoding/vparquet/block_traceql.go
@@ -585,7 +585,6 @@ func createSpanIterator(makeIter makeIterFn, conditions []traceql.Condition, req
 	spanCol := &spanCollector{
 		minCount,
 		durationPredicates,
-		false,
 	}
 
 	// This is an optimization for when all of the span conditions must be met.
@@ -610,9 +609,7 @@ func createSpanIterator(makeIter makeIterFn, conditions []traceql.Condition, req
 	//  how do we know to pull duration for things like | avg(duration) > 1s? look at avg(span.http.status_code) it pushes a column request down here
 	//  the entire engine is built around spans. we have to return at least one entry for every span to the layers above for things to work
 	if len(required) == 0 {
-		required = []parquetquery.Iterator{makeIter(columnPathSpanKind, nil, columnPathSpanKind)}
-		// without this queries like { .cluster = "cluster" } fail b/c they rely on the span iterator to return something
-		spanCol.kindAsCount = true // signal to the collector we are only grabbing kind to count spans and we should not store it in attributes
+		required = []parquetquery.Iterator{makeIter(columnPathSpanKind, nil, "")}
 	}
 
 	// Left join here means the span id/start/end iterators + 1 are required,
@@ -1048,7 +1045,6 @@ func createAttributeIterator(makeIter makeIterFn, conditions []traceql.Condition
 type spanCollector struct {
 	minAttributes   int
 	durationFilters []*parquetquery.GenericPredicate[int64]
-	kindAsCount     bool
 }
 
 var _ parquetquery.GroupPredicate = (*spanCollector)(nil)
@@ -1096,11 +1092,7 @@ func (c *spanCollector) KeepGroup(res *parquetquery.IteratorResult) bool {
 			}
 			span.attributes[traceql.NewIntrinsic(traceql.IntrinsicStatus)] = traceql.NewStaticStatus(status)
 		case columnPathSpanKind:
-			// if we're actually retrieving kind then keep it, otherwise it's just being used to count spans and we should not
-			//  include it in our attributes
-			if !c.kindAsCount {
-				span.attributes[newSpanAttr(columnPathSpanKind)] = traceql.NewStaticInt(int(kv.Value.Int64()))
-			}
+			span.attributes[newSpanAttr(columnPathSpanKind)] = traceql.NewStaticInt(int(kv.Value.Int64()))
 		default:
 			// TODO - This exists for span-level dedicated columns like http.status_code
 			// Are nils possible here?

From 218ba991261afab58490a544f1df0bceb585d4b2 Mon Sep 17 00:00:00 2001
From: Joe Elliott <number101010@gmail.com>
Date: Tue, 28 Feb 2023 08:26:30 -0500
Subject: [PATCH 24/27] fix optimization

Signed-off-by: Joe Elliott <number101010@gmail.com>
---
 tempodb/encoding/vparquet/block_traceql.go | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tempodb/encoding/vparquet/block_traceql.go b/tempodb/encoding/vparquet/block_traceql.go
index f5e2df997d2..b87bd102818 100644
--- a/tempodb/encoding/vparquet/block_traceql.go
+++ b/tempodb/encoding/vparquet/block_traceql.go
@@ -1025,7 +1025,8 @@ func createAttributeIterator(makeIter makeIterFn, conditions []traceql.Condition
 		// Bring in any of the typed values as needed.
 
 		// if all conditions must be true we can use a simple join iterator to test the values one column at a time.
-		if allConditions {
+		// len(valueIters) must be 1 to handle queries like `{ span.foo = "x" && span.bar > 1}`
+		if allConditions && len(valueIters) == 1 {
 			iters := append([]parquetquery.Iterator{makeIter(keyPath, parquetquery.NewStringInPredicate(attrKeys), "key")}, valueIters...)
 			return parquetquery.NewJoinIterator(definitionLevel,
 				iters,

From 00b2fd2df3e0e9444ded97066d63a3be84460c42 Mon Sep 17 00:00:00 2001
From: Joe Elliott <number101010@gmail.com>
Date: Tue, 28 Feb 2023 08:32:12 -0500
Subject: [PATCH 25/27] tier out close

Signed-off-by: Joe Elliott <number101010@gmail.com>
---
 tempodb/encoding/vparquet/block_traceql.go    | 27 ++++++++++++-------
 .../encoding/vparquet/block_traceql_meta.go   |  5 ++--
 2 files changed, 21 insertions(+), 11 deletions(-)

diff --git a/tempodb/encoding/vparquet/block_traceql.go b/tempodb/encoding/vparquet/block_traceql.go
index b87bd102818..f7ebadc518c 100644
--- a/tempodb/encoding/vparquet/block_traceql.go
+++ b/tempodb/encoding/vparquet/block_traceql.go
@@ -14,6 +14,7 @@ import (
 	"github.com/grafana/tempo/pkg/parquetquery"
 	v1 "github.com/grafana/tempo/pkg/tempopb/trace/v1"
 	"github.com/grafana/tempo/pkg/traceql"
+	"github.com/grafana/tempo/pkg/util"
 	"github.com/grafana/tempo/tempodb/encoding/common"
 )
 
@@ -238,6 +239,10 @@ func newSpansetIterator(iter parquetquery.Iterator, filter traceql.FilterSpans)
 	}
 }
 
+func (i *spansetIterator) String() string {
+	return fmt.Sprintf("spansetIterator: \n\t%s", util.TabOut(i.iter))
+}
+
 func (i *spansetIterator) Next() (*span, error) {
 	// drain current buffer
 	if len(i.currentSpans) > 0 {
@@ -295,6 +300,10 @@ func (i *spansetIterator) Next() (*span, error) {
 	}
 }
 
+func (i *spansetIterator) Close() {
+	i.iter.Close()
+}
+
 // mergeSpansetIterator iterates through a slice of spansetIterators exhausting them
 // in order
 type mergeSpansetIterator struct {
@@ -1149,7 +1158,7 @@ type batchCollector struct {
 
 	// shared static spans used in KeepGroup. done for memory savings, but won't
 	// work if the batchCollector is accessed concurrently
-	keepGroupSpans []*span
+	buffer []*span
 }
 
 var _ parquetquery.GroupPredicate = (*batchCollector)(nil)
@@ -1164,12 +1173,12 @@ func (c *batchCollector) KeepGroup(res *parquetquery.IteratorResult) bool {
 	// flattens it into 1 spanset per trace.  All we really need
 	// todo is merge the resource-level attributes onto the spans
 	// and filter out spans that didn't match anything.
-	c.keepGroupSpans = c.keepGroupSpans[:0]
+	c.buffer = c.buffer[:0]
 
 	resAttrs := make(map[traceql.Attribute]traceql.Static)
 	for _, kv := range res.OtherEntries {
 		if span, ok := kv.Value.(*span); ok {
-			c.keepGroupSpans = append(c.keepGroupSpans, span)
+			c.buffer = append(c.buffer, span)
 			continue
 		}
 
@@ -1178,7 +1187,7 @@ func (c *batchCollector) KeepGroup(res *parquetquery.IteratorResult) bool {
 	}
 
 	// Throw out batches without any spans
-	if len(c.keepGroupSpans) == 0 {
+	if len(c.buffer) == 0 {
 		return false
 	}
 
@@ -1200,7 +1209,7 @@ func (c *batchCollector) KeepGroup(res *parquetquery.IteratorResult) bool {
 
 	// Copy resource-level attributes to the individual spans now
 	for k, v := range resAttrs {
-		for _, span := range c.keepGroupSpans {
+		for _, span := range c.buffer {
 			if _, alreadyExists := span.attributes[k]; !alreadyExists {
 				span.attributes[k] = v
 			}
@@ -1208,7 +1217,7 @@ func (c *batchCollector) KeepGroup(res *parquetquery.IteratorResult) bool {
 	}
 
 	// Remove unmatched attributes
-	for _, span := range c.keepGroupSpans {
+	for _, span := range c.buffer {
 		for k, v := range span.attributes {
 			if v.Type == traceql.TypeNil {
 				delete(span.attributes, k)
@@ -1220,7 +1229,7 @@ func (c *batchCollector) KeepGroup(res *parquetquery.IteratorResult) bool {
 
 	// Copy over only spans that met minimum criteria
 	if c.requireAtLeastOneMatchOverall {
-		for _, span := range c.keepGroupSpans {
+		for _, span := range c.buffer {
 			if len(span.attributes) > 0 {
 				filteredSpans = append(filteredSpans, span)
 				continue
@@ -1228,8 +1237,8 @@ func (c *batchCollector) KeepGroup(res *parquetquery.IteratorResult) bool {
 			putSpan(span)
 		}
 	} else {
-		filteredSpans = make([]traceql.Span, 0, len(c.keepGroupSpans))
-		for _, span := range c.keepGroupSpans {
+		filteredSpans = make([]traceql.Span, 0, len(c.buffer))
+		for _, span := range c.buffer {
 			filteredSpans = append([]traceql.Span(nil), span)
 		}
 	}
diff --git a/tempodb/encoding/vparquet/block_traceql_meta.go b/tempodb/encoding/vparquet/block_traceql_meta.go
index b28caca241e..1ead0fce3b6 100644
--- a/tempodb/encoding/vparquet/block_traceql_meta.go
+++ b/tempodb/encoding/vparquet/block_traceql_meta.go
@@ -7,6 +7,7 @@ import (
 	"github.com/grafana/tempo/pkg/parquetquery"
 	pq "github.com/grafana/tempo/pkg/parquetquery"
 	"github.com/grafana/tempo/pkg/traceql"
+	"github.com/grafana/tempo/pkg/util"
 )
 
 func createSpansetMetaIterator(makeIter makeIterFn, ss *spansetIterator, spanStartEndRetreived bool) (*spansetMetadataIterator, error) {
@@ -44,7 +45,7 @@ type spansToMetaIterator struct {
 var _ pq.Iterator = (*spansToMetaIterator)(nil)
 
 func (i *spansToMetaIterator) String() string {
-	return fmt.Sprintf("spansToMetaIterator: \n\t%s", i.iter.iter.String())
+	return fmt.Sprintf("spansToMetaIterator: \n\t%s", util.TabOut(i.iter))
 }
 
 func (i *spansToMetaIterator) Next() (*pq.IteratorResult, error) {
@@ -74,7 +75,7 @@ func (i *spansToMetaIterator) SeekTo(to pq.RowNumber, definitionLevel int) (*pq.
 }
 
 func (i *spansToMetaIterator) Close() {
-	i.iter.iter.Close()
+	i.iter.Close()
 }
 
 // spanMetaCollector collects iterator results with the expectation that they were created

From 110f716843cd089f622f13461843654f2b32e149 Mon Sep 17 00:00:00 2001
From: Joe Elliott <number101010@gmail.com>
Date: Tue, 28 Feb 2023 09:13:20 -0500
Subject: [PATCH 26/27] patched up tests

Signed-off-by: Joe Elliott <number101010@gmail.com>
---
 pkg/traceql/ast_execute_test.go | 142 ++++++++++++++++----------------
 1 file changed, 71 insertions(+), 71 deletions(-)

diff --git a/pkg/traceql/ast_execute_test.go b/pkg/traceql/ast_execute_test.go
index 55cd6616288..a3d608811c6 100644
--- a/pkg/traceql/ast_execute_test.go
+++ b/pkg/traceql/ast_execute_test.go
@@ -302,57 +302,57 @@ func TestScalarFilterEvaluate(t *testing.T) {
 func TestBinaryOperationWorksWithFloatsAndInts(t *testing.T) {
 	testCases := []struct {
 		query  string
-		input  []Spanset
-		output []Spanset
+		input  []*Spanset
+		output []*Spanset
 	}{
 		// binops work int -> float
 		{
 			"{ .foo > 0 }",
-			[]Spanset{
+			[]*Spanset{
 				{Spans: []Span{
-					{ID: []byte{1}, Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticInt(1)}},
-					{ID: []byte{2}, Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticFloat(1)}},
+					&mockSpan{id: []byte{1}, attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticInt(1)}},
+					&mockSpan{id: []byte{2}, attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticFloat(1)}},
 				}},
 			},
-			[]Spanset{
+			[]*Spanset{
 				{
 					Spans: []Span{
-						{ID: []byte{1}, Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticInt(1)}},
-						{ID: []byte{2}, Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticFloat(1)}},
+						&mockSpan{id: []byte{1}, attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticInt(1)}},
+						&mockSpan{id: []byte{2}, attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticFloat(1)}},
 					},
 				},
 			},
 		},
 		{
 			"{ .foo < 2 }",
-			[]Spanset{
+			[]*Spanset{
 				{Spans: []Span{
-					{ID: []byte{1}, Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticInt(1)}},
-					{ID: []byte{2}, Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticFloat(1)}},
+					&mockSpan{id: []byte{1}, attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticInt(1)}},
+					&mockSpan{id: []byte{2}, attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticFloat(1)}},
 				}},
 			},
-			[]Spanset{
+			[]*Spanset{
 				{
 					Spans: []Span{
-						{ID: []byte{1}, Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticInt(1)}},
-						{ID: []byte{2}, Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticFloat(1)}},
+						&mockSpan{id: []byte{1}, attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticInt(1)}},
+						&mockSpan{id: []byte{2}, attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticFloat(1)}},
 					},
 				},
 			},
 		},
 		{
 			"{ .foo = 1 }",
-			[]Spanset{
+			[]*Spanset{
 				{Spans: []Span{
-					{ID: []byte{1}, Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticInt(1)}},
-					{ID: []byte{2}, Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticFloat(1)}},
+					&mockSpan{id: []byte{1}, attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticInt(1)}},
+					&mockSpan{id: []byte{2}, attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticFloat(1)}},
 				}},
 			},
-			[]Spanset{
+			[]*Spanset{
 				{
 					Spans: []Span{
-						{ID: []byte{1}, Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticInt(1)}},
-						{ID: []byte{2}, Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticFloat(1)}},
+						&mockSpan{id: []byte{1}, attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticInt(1)}},
+						&mockSpan{id: []byte{2}, attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticFloat(1)}},
 					},
 				},
 			},
@@ -360,51 +360,51 @@ func TestBinaryOperationWorksWithFloatsAndInts(t *testing.T) {
 		// binops work float -> int
 		{
 			"{ .foo > 0. }",
-			[]Spanset{
+			[]*Spanset{
 				{Spans: []Span{
-					{ID: []byte{1}, Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticInt(1)}},
-					{ID: []byte{2}, Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticFloat(1)}},
+					&mockSpan{id: []byte{1}, attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticInt(1)}},
+					&mockSpan{id: []byte{2}, attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticFloat(1)}},
 				}},
 			},
-			[]Spanset{
+			[]*Spanset{
 				{
 					Spans: []Span{
-						{ID: []byte{1}, Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticInt(1)}},
-						{ID: []byte{2}, Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticFloat(1)}},
+						&mockSpan{id: []byte{1}, attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticInt(1)}},
+						&mockSpan{id: []byte{2}, attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticFloat(1)}},
 					},
 				},
 			},
 		},
 		{
 			"{ .foo < 2. }",
-			[]Spanset{
+			[]*Spanset{
 				{Spans: []Span{
-					{ID: []byte{1}, Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticInt(1)}},
-					{ID: []byte{2}, Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticFloat(1)}},
+					&mockSpan{id: []byte{1}, attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticInt(1)}},
+					&mockSpan{id: []byte{2}, attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticFloat(1)}},
 				}},
 			},
-			[]Spanset{
+			[]*Spanset{
 				{
 					Spans: []Span{
-						{ID: []byte{1}, Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticInt(1)}},
-						{ID: []byte{2}, Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticFloat(1)}},
+						&mockSpan{id: []byte{1}, attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticInt(1)}},
+						&mockSpan{id: []byte{2}, attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticFloat(1)}},
 					},
 				},
 			},
 		},
 		{
 			"{ .foo = 1. }",
-			[]Spanset{
+			[]*Spanset{
 				{Spans: []Span{
-					{ID: []byte{1}, Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticInt(1)}},
-					{ID: []byte{2}, Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticFloat(1)}},
+					&mockSpan{id: []byte{1}, attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticInt(1)}},
+					&mockSpan{id: []byte{2}, attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticFloat(1)}},
 				}},
 			},
-			[]Spanset{
+			[]*Spanset{
 				{
 					Spans: []Span{
-						{ID: []byte{1}, Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticInt(1)}},
-						{ID: []byte{2}, Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticFloat(1)}},
+						&mockSpan{id: []byte{1}, attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticInt(1)}},
+						&mockSpan{id: []byte{2}, attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticFloat(1)}},
 					},
 				},
 			},
@@ -412,51 +412,51 @@ func TestBinaryOperationWorksWithFloatsAndInts(t *testing.T) {
 		// binops work with statics
 		{
 			"{ 1 > 0. }",
-			[]Spanset{
+			[]*Spanset{
 				{Spans: []Span{
-					{ID: []byte{1}, Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticInt(1)}},
-					{ID: []byte{2}, Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticFloat(1)}},
+					&mockSpan{id: []byte{1}, attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticInt(1)}},
+					&mockSpan{id: []byte{2}, attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticFloat(1)}},
 				}},
 			},
-			[]Spanset{
+			[]*Spanset{
 				{
 					Spans: []Span{
-						{ID: []byte{1}, Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticInt(1)}},
-						{ID: []byte{2}, Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticFloat(1)}},
+						&mockSpan{id: []byte{1}, attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticInt(1)}},
+						&mockSpan{id: []byte{2}, attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticFloat(1)}},
 					},
 				},
 			},
 		},
 		{
 			"{ 0 < 2. }",
-			[]Spanset{
+			[]*Spanset{
 				{Spans: []Span{
-					{ID: []byte{1}, Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticInt(1)}},
-					{ID: []byte{2}, Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticFloat(1)}},
+					&mockSpan{id: []byte{1}, attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticInt(1)}},
+					&mockSpan{id: []byte{2}, attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticFloat(1)}},
 				}},
 			},
-			[]Spanset{
+			[]*Spanset{
 				{
 					Spans: []Span{
-						{ID: []byte{1}, Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticInt(1)}},
-						{ID: []byte{2}, Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticFloat(1)}},
+						&mockSpan{id: []byte{1}, attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticInt(1)}},
+						&mockSpan{id: []byte{2}, attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticFloat(1)}},
 					},
 				},
 			},
 		},
 		{
 			"{ 1 = 1. }",
-			[]Spanset{
+			[]*Spanset{
 				{Spans: []Span{
-					{ID: []byte{1}, Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticInt(1)}},
-					{ID: []byte{2}, Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticFloat(1)}},
+					&mockSpan{id: []byte{1}, attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticInt(1)}},
+					&mockSpan{id: []byte{2}, attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticFloat(1)}},
 				}},
 			},
-			[]Spanset{
+			[]*Spanset{
 				{
 					Spans: []Span{
-						{ID: []byte{1}, Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticInt(1)}},
-						{ID: []byte{2}, Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticFloat(1)}},
+						&mockSpan{id: []byte{1}, attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticInt(1)}},
+						&mockSpan{id: []byte{2}, attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticFloat(1)}},
 					},
 				},
 			},
@@ -464,43 +464,43 @@ func TestBinaryOperationWorksWithFloatsAndInts(t *testing.T) {
 		// binops work with attributes
 		{
 			"{ .foo < .bar }",
-			[]Spanset{
+			[]*Spanset{
 				{Spans: []Span{
-					{ID: []byte{1}, Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticInt(1), NewAttribute("bar"): NewStaticFloat(2)}},
-					{ID: []byte{2}, Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticInt(2), NewAttribute("bar"): NewStaticFloat(1)}},
+					&mockSpan{id: []byte{1}, attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticInt(1), NewAttribute("bar"): NewStaticFloat(2)}},
+					&mockSpan{id: []byte{2}, attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticInt(2), NewAttribute("bar"): NewStaticFloat(1)}},
 				}},
 			},
-			[]Spanset{
+			[]*Spanset{
 				{Spans: []Span{
-					{ID: []byte{1}, Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticInt(1), NewAttribute("bar"): NewStaticFloat(2)}},
+					&mockSpan{id: []byte{1}, attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticInt(1), NewAttribute("bar"): NewStaticFloat(2)}},
 				}},
 			},
 		},
 		{
 			"{ .bar > .foo }",
-			[]Spanset{
+			[]*Spanset{
 				{Spans: []Span{
-					{ID: []byte{1}, Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticInt(1), NewAttribute("bar"): NewStaticFloat(2)}},
-					{ID: []byte{2}, Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticInt(2), NewAttribute("bar"): NewStaticFloat(1)}},
+					&mockSpan{id: []byte{1}, attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticInt(1), NewAttribute("bar"): NewStaticFloat(2)}},
+					&mockSpan{id: []byte{2}, attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticInt(2), NewAttribute("bar"): NewStaticFloat(1)}},
 				}},
 			},
-			[]Spanset{
+			[]*Spanset{
 				{Spans: []Span{
-					{ID: []byte{1}, Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticInt(1), NewAttribute("bar"): NewStaticFloat(2)}},
+					&mockSpan{id: []byte{1}, attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticInt(1), NewAttribute("bar"): NewStaticFloat(2)}},
 				}},
 			},
 		},
 		{
 			"{ .foo = .bar }",
-			[]Spanset{
+			[]*Spanset{
 				{Spans: []Span{
-					{ID: []byte{1}, Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticInt(1), NewAttribute("bar"): NewStaticFloat(1)}},
-					{ID: []byte{2}, Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticInt(2), NewAttribute("bar"): NewStaticFloat(1)}},
+					&mockSpan{id: []byte{1}, attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticInt(1), NewAttribute("bar"): NewStaticFloat(1)}},
+					&mockSpan{id: []byte{2}, attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticInt(2), NewAttribute("bar"): NewStaticFloat(1)}},
 				}},
 			},
-			[]Spanset{
+			[]*Spanset{
 				{Spans: []Span{
-					{ID: []byte{1}, Attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticInt(1), NewAttribute("bar"): NewStaticFloat(1)}},
+					&mockSpan{id: []byte{1}, attributes: map[Attribute]Static{NewAttribute("foo"): NewStaticInt(1), NewAttribute("bar"): NewStaticFloat(1)}},
 				}},
 			},
 		},

From 0388a62a573515f452e49cdeb4f5e9de02c4bd79 Mon Sep 17 00:00:00 2001
From: Joe Elliott <number101010@gmail.com>
Date: Fri, 3 Mar 2023 12:45:30 -0500
Subject: [PATCH 27/27] review

Signed-off-by: Joe Elliott <number101010@gmail.com>
---
 pkg/traceql/engine.go                      | 9 +++++----
 tempodb/encoding/vparquet/block_traceql.go | 5 +----
 2 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/pkg/traceql/engine.go b/pkg/traceql/engine.go
index 8c5d7dccdf2..f08c1263166 100644
--- a/pkg/traceql/engine.go
+++ b/pkg/traceql/engine.go
@@ -3,6 +3,7 @@ package traceql
 import (
 	"context"
 	"fmt"
+	"io"
 	"time"
 
 	"github.com/opentracing/opentracing-go"
@@ -77,13 +78,13 @@ func (e *Engine) Execute(ctx context.Context, searchReq *tempopb.SearchRequest,
 	}
 	for {
 		spanset, err := iterator.Next(ctx)
-		if spanset == nil {
-			break
-		}
-		if err != nil {
+		if err != nil && err != io.EOF {
 			span.LogKV("msg", "iterator.Next", "err", err)
 			return nil, err
 		}
+		if spanset == nil {
+			break
+		}
 		res.Traces = append(res.Traces, e.asTraceSearchMetadata(spanset))
 
 		if len(res.Traces) >= int(searchReq.Limit) && searchReq.Limit > 0 {
diff --git a/tempodb/encoding/vparquet/block_traceql.go b/tempodb/encoding/vparquet/block_traceql.go
index f7ebadc518c..b41caf2eb02 100644
--- a/tempodb/encoding/vparquet/block_traceql.go
+++ b/tempodb/encoding/vparquet/block_traceql.go
@@ -270,9 +270,6 @@ func (i *spansetIterator) Next() (*span, error) {
 			return nil, fmt.Errorf("engine assumption broken: spanset is not of type *traceql.Spanset")
 		}
 
-		// TODO: the engine wants to work with value types, but the fetch layer is using pointers. as a result
-		//  there is some gross "bridge" code here that converts between the two. We should write benchmarks
-		//  and determine if moving to or the other is worth it
 		var filteredSpansets []*traceql.Spanset
 		if i.filter != nil {
 			filteredSpansets, err = i.filter(spanset)
@@ -1239,7 +1236,7 @@ func (c *batchCollector) KeepGroup(res *parquetquery.IteratorResult) bool {
 	} else {
 		filteredSpans = make([]traceql.Span, 0, len(c.buffer))
 		for _, span := range c.buffer {
-			filteredSpans = append([]traceql.Span(nil), span)
+			filteredSpans = append(filteredSpans, span)
 		}
 	}