Skip to content

Commit

Permalink
Merge branch 'main' into chaudum/helm-fix-bloom-planner-builder-storage
Browse files Browse the repository at this point in the history
  • Loading branch information
chaudum authored Sep 17, 2024
2 parents ae5085e + 78b275b commit 00cb557
Show file tree
Hide file tree
Showing 49 changed files with 1,475 additions and 441 deletions.
26 changes: 0 additions & 26 deletions .github/workflows/backport.yml

This file was deleted.

211 changes: 211 additions & 0 deletions .github/workflows/helm-loki-ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,211 @@
---
name: helm-loki-ci
on:
pull_request:
paths:
- "production/helm/loki/**"

jobs:
publish-diff:
name: Publish Rendered Helm Chart Diff
runs-on: ubuntu-latest
steps:
- name: Setup Helm
uses: azure/setup-helm@v4

- name: Add required Helm repositories
run: |
helm repo add minio https://charts.min.io/
helm repo add grafana https://grafana.github.io/helm-charts
helm repo add grafana-operator https://grafana.github.io/helm-charts
helm repo update
- name: Prepare directories for base and PR branches
run: |
mkdir -p ${{ github.workspace }}/base
mkdir -p ${{ github.workspace }}/pr
mkdir -p ${{ github.workspace }}/output
mkdir -p ${{ github.workspace }}/output/base
mkdir -p ${{ github.workspace }}/output/pr
- name: Checkout base branch to 'base' folder within workspace
uses: actions/checkout@v4
with:
ref: ${{ github.event.pull_request.base.ref }}
path: ${{ github.workspace }}/base

- name: Checkout PR branch to 'pr' folder within workspace
uses: actions/checkout@v4
with:
ref: ${{ github.event.pull_request.head.ref }}
path: ${{ github.workspace }}/pr

- name: Render Helm chart for each scenario in the base branch
run: |
cd ${{ github.workspace }}/base/production/helm/loki
if [ ! -d "scenarios" ]; then
echo "Directory with the scenarios does not exist in base branch, skipping rendering them."
exit 0
fi
helm dependency build
for file in scenarios/*.yaml; do
cat "$file"
schenario_folder=${{ github.workspace }}/output/base/$(basename $file .yaml)
mkdir $schenario_folder
helm template loki-test-chart-name . -f $file --output-dir $schenario_folder
done
- name: Render Helm chart for each scenario in the PR branch
run: |
cd ${{ github.workspace }}/pr/production/helm/loki
helm dependency build
for file in scenarios/*.yaml; do
cat "$file"
schenario_folder=${{ github.workspace }}/output/pr/$(basename $file .yaml)
mkdir $schenario_folder
helm template loki-test-chart-name . -f $file --output-dir $schenario_folder
done
- name: Calculate the diff between base and PR rendered manifests for each scenario
run: |
cd ${{ github.workspace }}/pr/production/helm/loki
for scenario_file in scenarios/*.yaml; do
added_files='[]'
modified_files='[]'
removed_files='[]'
scenario_name=$(basename $scenario_file .yaml)
base_branch_dir=${{ github.workspace }}/output/base/$scenario_name
pr_branch_dir=${{ github.workspace }}/output/pr/$scenario_name
echo "Comparing directories: $base_branch_dir and $pr_branch_dir"
# Find all files in the left and right directories
base_branch_files=$(if [[ -d "$base_branch_dir" ]]; then find "$base_branch_dir" -type f | sed "s|$base_branch_dir/||"; else echo ""; fi)
pr_branch_files=$(find "$pr_branch_dir" -type f | sed "s|$pr_branch_dir/||")
# Check for modified and removed files
for file in $base_branch_files; do
echo "check if file exists: $file"
if [[ -f "$pr_branch_dir/$file" ]]; then
echo "File exists in both directories, check if it is modified"
if ! diff -q "$base_branch_dir/$file" "$pr_branch_dir/$file" >/dev/null; then
echo "file is modified $file"
file_diff=$(diff -c "$base_branch_dir/$file" "$pr_branch_dir/$file" || true)
diff_obj=$(jq -n --arg file "$file" --arg diff "$file_diff" '{"filename": $file, "diff": $diff}')
# Append the new object to the JSON array using jq
modified_files=$(echo "$modified_files" | jq --argjson diff_obj "$diff_obj" '. += [$diff_obj]')
else
echo "file is not modified"
fi
else
echo "file is removed $file"
# File is missing in the PR directory
file_content=$(cat "$base_branch_dir/$file")
removed_obj=$(jq -n --arg filename "$file" --arg content "$file_content" '{"filename": $filename, "content": $content}')
# Append the new object to the JSON array using jq
removed_files=$(echo "$removed_files" | jq --argjson removed_obj "$removed_obj" '. += [$removed_obj]')
fi
done
# Check for added files in the right directory
for file in $pr_branch_files; do
if [[ ! -f "$base_branch_dir/$file" ]]; then
echo "added file detected"
# File is missing in the PR directory
file_content=$(cat "$pr_branch_dir/$file")
added_obj=$(jq -n --arg file "$file" --arg content "$file_content" '{"filename": $file, "content": $content}')
# Append the new object to the JSON array using jq
added_files=$(echo "$added_files" | jq --argjson added_obj "$added_obj" '. += [$added_obj]')
fi
done
scenario_output_dir="${{ github.workspace }}/output/$scenario_name"
mkdir $scenario_output_dir
echo $added_files > $scenario_output_dir/added_files.json
echo $modified_files > $scenario_output_dir/modified_files.json
echo $removed_files > $scenario_output_dir/removed_files.json
echo $removed_files
done
- name: Generate Markdown Summary
run: |
# Initialize the Markdown output file
output_file="${{ github.workspace }}/output/diff_summary.md"
echo "# Kubernetes Manifest Diff Summary" > $output_file
# Iterate over each scenario file
for file in ${{ github.workspace }}/pr/production/helm/loki/scenarios/*.yaml; do
scenario=$(basename "$file" .yaml)
echo "Processing scenario: $scenario"
# Read JSON data for added, modified, and removed files
added_files=$(cat ${{ github.workspace }}/output/$scenario/added_files.json)
modified_files=$(cat ${{ github.workspace }}/output/$scenario/modified_files.json)
removed_files=$(cat ${{ github.workspace }}/output/$scenario/removed_files.json)
# Count the number of added, modified, and removed files
num_added=$(echo "$added_files" | jq length)
num_modified=$(echo "$modified_files" | jq length)
num_removed=$(echo "$removed_files" | jq length)
# Create a header for the scenario
echo -e "\n<details><summary>Scenario: $scenario (Added: $num_added, Modified: $num_modified, Removed: $num_removed) </summary>\n" >> $output_file
echo -e "<p>\n\n" >> $output_file

# Add summary counts
echo -e "\n**Summary:**" >> $output_file
echo -e "\n- **Added:** $num_added" >> $output_file
echo -e "\n- **Modified:** $num_modified" >> $output_file
echo -e "\n- **Removed:** $num_removed" >> $output_file

# Add details for added files
echo -e "\n### Added Files" >> $output_file
if [[ "$num_added" -gt 0 ]]; then
echo "$added_files" | jq -c '.[]' | while read -r obj; do
filename=$(echo "$obj" | jq -r '.filename')
content=$(echo "$obj" | jq -r '.content')
echo -e "\n<details><summary>$filename</summary>" >> $output_file
echo -e "\n\`\`\`yaml\n$content\n\`\`\`\n</details>" >> $output_file
done
else
echo -e "\n_No added files_\n" >> $output_file
fi

# Add details for modified files
echo -e "\n### Modified Files" >> $output_file
if [[ "$num_modified" -gt 0 ]]; then
echo "$modified_files" | jq -c '.[]' | while read -r obj; do
filename=$(echo "$obj" | jq -r '.filename')
diff=$(echo "$obj" | jq -r '.diff')
echo -e "\n<details><summary>$filename</summary>" >> $output_file
echo -e "\n\`\`\`diff\n$diff\n\`\`\`\n</details>" >> $output_file
done
else
echo -e "\n_No modified files_\n" >> $output_file
fi

# Add details for removed files
echo -e "\n### Removed Files" >> $output_file
if [[ "$num_removed" -gt 0 ]]; then
echo "$removed_files" | jq -c '.[]' | while read -r obj; do
filename=$(echo "$obj" | jq -r '.filename')
content=$(echo "$obj" | jq -r '.content')
echo -e "\n<details><summary>$filename</summary>" >> $output_file
echo -e "\n\`\`\`yaml\n$content\n\`\`\`\n</details>" >> $output_file
done
else
echo -e "\n_No removed files_\n" >> $output_file
fi

# close <p> and <details>
echo -e "\n\n</p>\n</details>" >> $output_file
done

- name: Post diff as PR comment
uses: marocchino/sticky-pull-request-comment@v2
with:
hide_and_recreate: true
hide_classify: "OUTDATED"
path: ${{ github.workspace }}/output/diff_summary.md
2 changes: 1 addition & 1 deletion .github/workflows/metrics-collector.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ on:

jobs:
main:
if: github.owner == "grafana"
if: github.owner == 'grafana'
runs-on: ubuntu-latest
steps:
- name: Checkout Actions
Expand Down
12 changes: 6 additions & 6 deletions pkg/bloomgateway/bloomgateway.go
Original file line number Diff line number Diff line change
Expand Up @@ -193,12 +193,12 @@ func (g *Gateway) FilterChunkRefs(ctx context.Context, req *logproto.FilterChunk
return nil, errors.New("from time must not be after through time")
}

filters := v1.ExtractTestableLineFilters(req.Plan.AST)
stats.NumFilters = len(filters)
g.metrics.receivedFilters.Observe(float64(len(filters)))
matchers := v1.ExtractTestableLabelMatchers(req.Plan.AST)
stats.NumMatchers = len(matchers)
g.metrics.receivedMatchers.Observe(float64(len(matchers)))

// Shortcut if request does not contain filters
if len(filters) == 0 {
if len(matchers) == 0 {
stats.Status = labelSuccess
return &logproto.FilterChunkRefResponse{
ChunkRefs: req.Refs,
Expand Down Expand Up @@ -227,7 +227,7 @@ func (g *Gateway) FilterChunkRefs(ctx context.Context, req *logproto.FilterChunk
stats.NumTasks = len(seriesByDay)

sp.LogKV(
"filters", len(filters),
"matchers", len(matchers),
"days", len(seriesByDay),
"blocks", len(req.Blocks),
"series_requested", len(req.Refs),
Expand All @@ -239,7 +239,7 @@ func (g *Gateway) FilterChunkRefs(ctx context.Context, req *logproto.FilterChunk
}

series := seriesByDay[0]
task := newTask(ctx, tenantID, series, filters, blocks)
task := newTask(ctx, tenantID, series, matchers, blocks)

// TODO(owen-d): include capacity in constructor?
task.responses = responsesPool.Get(len(series.series))
Expand Down
24 changes: 11 additions & 13 deletions pkg/bloomgateway/bloomgateway_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ func TestBloomGateway_FilterChunkRefs(t *testing.T) {

chunkRefs := createQueryInputFromBlockData(t, tenantID, data, 100)

expr, err := syntax.ParseExpr(`{foo="bar"} |= "does not match"`)
expr, err := syntax.ParseExpr(`{foo="bar"} | trace_id="nomatch"`)
require.NoError(t, err)

req := &logproto.FilterChunkRefRequest{
Expand Down Expand Up @@ -196,7 +196,7 @@ func TestBloomGateway_FilterChunkRefs(t *testing.T) {
// saturate workers
// then send additional request
for i := 0; i < gw.cfg.WorkerConcurrency+1; i++ {
expr, err := syntax.ParseExpr(`{foo="bar"} |= "does not match"`)
expr, err := syntax.ParseExpr(`{foo="bar"} | trace_id="nomatch"`)
require.NoError(t, err)

req := &logproto.FilterChunkRefRequest{
Expand Down Expand Up @@ -240,7 +240,7 @@ func TestBloomGateway_FilterChunkRefs(t *testing.T) {
// saturate workers
// then send additional request
for i := 0; i < gw.cfg.WorkerConcurrency+1; i++ {
expr, err := syntax.ParseExpr(`{foo="bar"} |= "does not match"`)
expr, err := syntax.ParseExpr(`{foo="bar"} | trace_id="nomatch"`)
require.NoError(t, err)

req := &logproto.FilterChunkRefRequest{
Expand Down Expand Up @@ -341,7 +341,7 @@ func TestBloomGateway_FilterChunkRefs(t *testing.T) {
Checksum: uint32(idx),
},
}
expr, err := syntax.ParseExpr(`{foo="bar"} |= "foo"`)
expr, err := syntax.ParseExpr(`{foo="bar"} | trace_id="nomatch"`)
require.NoError(t, err)
req := &logproto.FilterChunkRefRequest{
From: now.Add(-4 * time.Hour),
Expand Down Expand Up @@ -380,7 +380,7 @@ func TestBloomGateway_FilterChunkRefs(t *testing.T) {

t.Run("no match - return empty response", func(t *testing.T) {
inputChunkRefs := groupRefs(t, chunkRefs)
expr, err := syntax.ParseExpr(`{foo="bar"} |= "does not match"`)
expr, err := syntax.ParseExpr(`{foo="bar"} | trace_id="nomatch"`)
require.NoError(t, err)
req := &logproto.FilterChunkRefRequest{
From: now.Add(-8 * time.Hour),
Expand All @@ -403,16 +403,14 @@ func TestBloomGateway_FilterChunkRefs(t *testing.T) {
inputChunkRefs := groupRefs(t, chunkRefs)
// Hack to get search string for a specific series
// see MkBasicSeriesWithBlooms() in pkg/storage/bloom/v1/test_util.go
// each series has 1 chunk
// each chunk has multiple strings, from int(fp) to int(nextFp)-1
x := rand.Intn(len(inputChunkRefs))
fp := inputChunkRefs[x].Fingerprint
chks := inputChunkRefs[x].Refs
line := fmt.Sprintf("%04x:%04x", int(fp), 0) // first line
rnd := rand.Intn(len(inputChunkRefs))
fp := inputChunkRefs[rnd].Fingerprint
chks := inputChunkRefs[rnd].Refs
key := fmt.Sprintf("%s:%04x", model.Fingerprint(fp), 0)

t.Log("x=", x, "fp=", fp, "line=", line)
t.Log("rnd=", rnd, "fp=", fp, "key=", key)

expr, err := syntax.ParseExpr(fmt.Sprintf(`{foo="bar"} |= "%s"`, line))
expr, err := syntax.ParseExpr(fmt.Sprintf(`{foo="bar"} | trace_id="%s"`, key))
require.NoError(t, err)

req := &logproto.FilterChunkRefRequest{
Expand Down
8 changes: 4 additions & 4 deletions pkg/bloomgateway/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ type serverMetrics struct {
filteredSeries prometheus.Histogram
requestedChunks prometheus.Histogram
filteredChunks prometheus.Histogram
receivedFilters prometheus.Histogram
receivedMatchers prometheus.Histogram
}

func newMetrics(registerer prometheus.Registerer, namespace, subsystem string) *metrics {
Expand Down Expand Up @@ -105,11 +105,11 @@ func newServerMetrics(registerer prometheus.Registerer, namespace, subsystem str
Help: "Total amount of chunk refs filtered by bloom-gateway",
Buckets: prometheus.ExponentialBucketsRange(1, 100e3, 10),
}),
receivedFilters: promauto.With(registerer).NewHistogram(prometheus.HistogramOpts{
receivedMatchers: promauto.With(registerer).NewHistogram(prometheus.HistogramOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "request_filters",
Help: "Number of filters per request.",
Name: "request_matchers",
Help: "Number of matchers per request.",
Buckets: prometheus.ExponentialBuckets(1, 2, 9), // 1 -> 256
}),
}
Expand Down
Loading

0 comments on commit 00cb557

Please sign in to comment.