-
Notifications
You must be signed in to change notification settings - Fork 3.4k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Adds a new `index audit` command to the `lokitool` cmd. The new `index audit` validates that all chunks required by a given index are available at the object storage. This is useful to validate if you're missing data after a backfill or when migrating data from one Loki instance to another. See `pkg/tool/audit/README.md` for usage instructions.
- Loading branch information
1 parent
71507a2
commit 47f0236
Showing
40 changed files
with
18,109 additions
and
9 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
# Loki Index Auditing | ||
|
||
## Usage | ||
|
||
To audit your index data: | ||
1. Make sure you're authenticated to the cloud where your bucket lives in. | ||
In this example I'll be using GCP. | ||
2. Create a new YAML configuration file that defines your storage configuration. | ||
`lokitool` will use it to communicate with your data. | ||
Only TSDB is supported. Make sure you give all three fields: `schema_config`, `storage_config` and `tenant`. In this example I'm naming my file `configfile.yaml`: | ||
```yaml | ||
schema_config: | ||
configs: | ||
- from: "2023-08-21" | ||
index: | ||
period: 24h | ||
prefix: loki_env_tsdb_index_ | ||
object_store: gcs | ||
schema: v13 | ||
store: tsdb | ||
|
||
storage_config: | ||
gcs: | ||
bucket_name: loki-bucket | ||
|
||
tenant: 12345 | ||
``` | ||
3. Build a new `lokitool` binary: | ||
```bash | ||
go build ./cmd/lokitool | ||
``` | ||
4. Finally, invoke the `audit index` command the following way: | ||
```bash | ||
./lokitool audit index --period=19856 --config.file=configfile.yaml --index.file=index/loki_env_tsdb_index_19856/12345/1715707992714992001-compactor-1715199977885-1815707796275-g8003361.tsdb.gz | ||
``` | ||
The `--period` is the period of the index being audited. You can find it by checking the 5-digits number appended | ||
as a suffix of the Loki environment name in the index file. Example: For `index/loki_env_tsdb_index_19856/12345/...`, | ||
the period is 19856. | ||
The `--config.file` is the YAML configuration described in the first step. | ||
The `--index.file` is the path to the index file you want to audit. Take a look at your bucket to see its exactly path and substitute it accordingly. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,133 @@ | ||
package audit | ||
|
||
import ( | ||
"context" | ||
"fmt" | ||
"io" | ||
"path" | ||
"strings" | ||
"time" | ||
|
||
"github.com/go-kit/log" | ||
"github.com/go-kit/log/level" | ||
progressbar "github.com/schollz/progressbar/v3" | ||
"go.uber.org/atomic" | ||
"golang.org/x/sync/errgroup" | ||
|
||
"github.com/grafana/loki/v3/pkg/compactor" | ||
"github.com/grafana/loki/v3/pkg/compactor/retention" | ||
"github.com/grafana/loki/v3/pkg/storage" | ||
loki_storage "github.com/grafana/loki/v3/pkg/storage" | ||
"github.com/grafana/loki/v3/pkg/storage/chunk/client" | ||
indexshipper_storage "github.com/grafana/loki/v3/pkg/storage/stores/shipper/indexshipper/storage" | ||
shipperutil "github.com/grafana/loki/v3/pkg/storage/stores/shipper/indexshipper/storage" | ||
"github.com/grafana/loki/v3/pkg/storage/stores/shipper/indexshipper/tsdb" | ||
util_log "github.com/grafana/loki/v3/pkg/util/log" | ||
) | ||
|
||
const ( | ||
TsFormat = time.RFC3339Nano | ||
) | ||
|
||
func Run(ctx context.Context, cloudIndexPath, table string, cfg Config, logger log.Logger) (int, int, error) { | ||
level.Info(logger).Log("msg", "auditing index", "index", cloudIndexPath, "table", table, "tenant", cfg.Tenant, "working_dir", cfg.WorkingDir) | ||
|
||
objClient, err := GetObjectClient(cfg) | ||
if err != nil { | ||
return 0, 0, err | ||
} | ||
|
||
localFile, err := DownloadIndexFile(ctx, cfg, cloudIndexPath, objClient, logger) | ||
if err != nil { | ||
return 0, 0, err | ||
} | ||
|
||
compactedIdx, err := ParseCompactexIndex(ctx, localFile, table, cfg) | ||
if err != nil { | ||
return 0, 0, err | ||
} | ||
defer compactedIdx.Cleanup() | ||
|
||
return ValidateCompactedIndex(ctx, objClient, compactedIdx, cfg.Concurrency, logger) | ||
} | ||
|
||
func GetObjectClient(cfg Config) (client.ObjectClient, error) { | ||
periodCfg := cfg.SchemaConfig.Configs[len(cfg.SchemaConfig.Configs)-1] // only check the last period. | ||
|
||
objClient, err := loki_storage.NewObjectClient(periodCfg.ObjectType, cfg.StorageConfig, storage.NewClientMetrics()) | ||
if err != nil { | ||
return nil, fmt.Errorf("couldn't create object client: %w", err) | ||
} | ||
|
||
return objClient, nil | ||
} | ||
|
||
func DownloadIndexFile(ctx context.Context, cfg Config, cloudIndexPath string, objClient client.ObjectClient, logger log.Logger) (string, error) { | ||
splitPath := strings.Split(cloudIndexPath, "/") | ||
localFileName := splitPath[len(splitPath)-1] | ||
decompress := indexshipper_storage.IsCompressedFile(cloudIndexPath) | ||
if decompress { | ||
// get rid of the last extension, which is .gz | ||
localFileName = strings.TrimSuffix(localFileName, path.Ext(localFileName)) | ||
} | ||
localFilePath := path.Join(cfg.WorkingDir, localFileName) | ||
if err := shipperutil.DownloadFileFromStorage(localFilePath, decompress, false, logger, func() (io.ReadCloser, error) { | ||
r, _, err := objClient.GetObject(ctx, cloudIndexPath) | ||
return r, err | ||
}); err != nil { | ||
return "", fmt.Errorf("couldn't download file %q from storage: %w", cloudIndexPath, err) | ||
} | ||
|
||
level.Info(logger).Log("msg", "file successfully downloaded from storage", "path", cloudIndexPath) | ||
return localFileName, nil | ||
} | ||
|
||
func ParseCompactexIndex(ctx context.Context, localFilePath, table string, cfg Config) (compactor.CompactedIndex, error) { | ||
periodCfg := cfg.SchemaConfig.Configs[len(cfg.SchemaConfig.Configs)-1] // only check the last period. | ||
idxCompactor := tsdb.NewIndexCompactor() | ||
compactedIdx, err := idxCompactor.OpenCompactedIndexFile(ctx, localFilePath, table, cfg.Tenant, cfg.WorkingDir, periodCfg, util_log.Logger) | ||
if err != nil { | ||
return nil, fmt.Errorf("couldn't open compacted index file %q: %w", localFilePath, err) | ||
} | ||
return compactedIdx, nil | ||
} | ||
|
||
func ValidateCompactedIndex(ctx context.Context, objClient client.ObjectClient, compactedIdx compactor.CompactedIndex, parallelism int, logger log.Logger) (int, int, error) { | ||
var missingChunks, foundChunks atomic.Int32 | ||
foundChunks.Store(0) | ||
missingChunks.Store(0) | ||
bar := progressbar.NewOptions(-1, | ||
progressbar.OptionShowCount(), | ||
progressbar.OptionSetDescription("Chunks validated"), | ||
) | ||
|
||
g, ctx := errgroup.WithContext(ctx) | ||
g.SetLimit(parallelism) | ||
compactedIdx.ForEachChunk(ctx, func(ce retention.ChunkEntry) (deleteChunk bool, err error) { //nolint:errcheck | ||
bar.Add(1) // nolint:errcheck | ||
g.Go(func() error { | ||
exists, err := CheckChunkExistance(string(ce.ChunkID), objClient) | ||
if err != nil || !exists { | ||
missingChunks.Add(1) | ||
logger.Log("msg", "chunk is missing", "err", err, "chunk_id", string(ce.ChunkID)) | ||
return nil | ||
} | ||
foundChunks.Add(1) | ||
return nil | ||
}) | ||
|
||
return false, nil | ||
}) | ||
g.Wait() // nolint:errcheck | ||
|
||
return int(foundChunks.Load()), int(missingChunks.Load()), nil | ||
} | ||
|
||
func CheckChunkExistance(key string, objClient client.ObjectClient) (bool, error) { | ||
exists, err := objClient.ObjectExists(context.Background(), key) | ||
if err != nil { | ||
return false, err | ||
} | ||
|
||
return exists, nil | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
package audit | ||
|
||
import ( | ||
"context" | ||
"strings" | ||
"testing" | ||
|
||
"github.com/go-kit/log" | ||
"github.com/stretchr/testify/require" | ||
|
||
"github.com/grafana/loki/v3/pkg/compactor" | ||
"github.com/grafana/loki/v3/pkg/compactor/retention" | ||
"github.com/grafana/loki/v3/pkg/storage/chunk/client" | ||
) | ||
|
||
type testObjClient struct { | ||
client.ObjectClient | ||
} | ||
|
||
func (t testObjClient) ObjectExists(_ context.Context, object string) (bool, error) { | ||
if strings.Contains(object, "missing") { | ||
return false, nil | ||
} | ||
return true, nil | ||
} | ||
|
||
type testCompactedIdx struct { | ||
compactor.CompactedIndex | ||
|
||
chunks []retention.ChunkEntry | ||
} | ||
|
||
func (t testCompactedIdx) ForEachChunk(_ context.Context, f retention.ChunkEntryCallback) error { | ||
for _, chunk := range t.chunks { | ||
if _, err := f(chunk); err != nil { | ||
return err | ||
} | ||
} | ||
return nil | ||
} | ||
|
||
func TestAuditIndex(t *testing.T) { | ||
ctx := context.Background() | ||
objClient := testObjClient{} | ||
compactedIdx := testCompactedIdx{ | ||
chunks: []retention.ChunkEntry{ | ||
{ChunkRef: retention.ChunkRef{ChunkID: []byte("found-1")}}, | ||
{ChunkRef: retention.ChunkRef{ChunkID: []byte("found-2")}}, | ||
{ChunkRef: retention.ChunkRef{ChunkID: []byte("found-3")}}, | ||
{ChunkRef: retention.ChunkRef{ChunkID: []byte("found-4")}}, | ||
{ChunkRef: retention.ChunkRef{ChunkID: []byte("missing-1")}}, | ||
}, | ||
} | ||
logger := log.NewNopLogger() | ||
found, missing, err := ValidateCompactedIndex(ctx, objClient, compactedIdx, 1, logger) | ||
require.NoError(t, err) | ||
require.Equal(t, 4, found) | ||
require.Equal(t, 1, missing) | ||
} |
Oops, something went wrong.