Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 22 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,9 @@ Options:
-port listen port for flame graph web service
-sep separator for flamegraph, rdb will separate key by it, default value is ":".
supporting multi separators: -sep sep1 -sep sep2
-prefix-sep separator for prefix analysis (flat-map mode, constant memory).
when specified, uses separator-based analysis instead of radix tree.
supporting multi separators: -prefix-sep sep1 -prefix-sep sep2
-regex using regex expression filter keys
-expire filter keys by its expiration time
1. '1751731200~1751817600' get keys with expiration time in range [1751731200, 1751817600]
Expand Down Expand Up @@ -86,7 +89,9 @@ parameters between '[' and ']' is optional
rdb -c bigkey [-o dump.aof] [-n 10] dump.rdb
5. get number and memory size by prefix
rdb -c prefix [-n 10] [-max-depth 3] [-o prefix-report.csv] dump.rdb
6. draw flamegraph
6. get number and memory size by prefix with separator (constant memory)
rdb -c prefix [-n 10] [-max-depth 3] -prefix-sep : [-o prefix-report.csv] dump.rdb
7. draw flamegraph
rdb -c flamegraph [-port 16379] [-sep :] dump.rdb
```

Expand Down Expand Up @@ -417,6 +422,22 @@ Example:
rdb -c prefix -n 10 -max-depth 2 -o prefix.csv cases/memory.rdb
```

## Separator-based Prefix Analysis (Constant Memory)

When you specify `-prefix-sep`, RDB uses a flat map instead of a radix tree for prefix analysis. This mode has constant memory usage regardless of the number of keys, making it suitable for very large RDB files.

You need to specify the separator(s) used in your key naming convention. Multiple separators are supported and will be normalized to the first one.

```bash
rdb -c prefix -prefix-sep : -n 10 -max-depth 2 -o prefix.csv dump.rdb
```

With multiple separators (e.g., keys use both `:` and `.`):

```bash
rdb -c prefix -prefix-sep : -prefix-sep . -n 10 -o prefix.csv dump.rdb
```

# Flame Graph

In many cases there is not a few very large key but lots of small keys that occupied most memory.
Expand Down
23 changes: 22 additions & 1 deletion README_CN.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,9 @@ Options:
-port listen port for flame graph web service
-sep separator for flamegraph, rdb will separate key by it, default value is ":".
supporting multi separators: -sep sep1 -sep sep2
-prefix-sep separator for prefix analysis (flat-map mode, constant memory).
when specified, uses separator-based analysis instead of radix tree.
supporting multi separators: -prefix-sep sep1 -prefix-sep sep2
-regex using regex expression filter keys
-expire filter keys by its expiration time
1. '1751731200~1751817600' get keys with expiration time in range [1751731200, 1751817600]
Expand Down Expand Up @@ -72,7 +75,9 @@ parameters between '[' and ']' is optional
rdb -c bigkey [-o dump.aof] [-n 10] dump.rdb
5. get number and memory size by prefix
rdb -c prefix [-n 10] [-max-depth 3] [-o prefix-report.csv] dump.rdb
6. draw flamegraph
6. get number and memory size by prefix with separator (constant memory)
rdb -c prefix [-n 10] [-max-depth 3] -prefix-sep : [-o prefix-report.csv] dump.rdb
7. draw flamegraph
rdb -c flamegraph [-port 16379] [-sep :] dump.rdb
```

Expand Down Expand Up @@ -343,6 +348,22 @@ Example:
rdb -c prefix -n 10 -max-depth 2 -o prefix.csv cases/memory.rdb
```

## 基于分隔符的前缀分析(恒定内存)

当指定 `-prefix-sep` 参数时,RDB 将使用 flat map 代替 radix tree 进行前缀分析。这种模式的内存占用不随 key 数量增长,适合分析超大 RDB 文件。

您需要指定 key 命名规范中使用的分隔符。支持多个分隔符,所有分隔符会被归一化为第一个。

```bash
rdb -c prefix -prefix-sep : -n 10 -max-depth 2 -o prefix.csv dump.rdb
```

使用多个分隔符(例如 key 中同时使用 `:` 和 `.`):

```bash
rdb -c prefix -prefix-sep : -prefix-sep . -n 10 -o prefix.csv dump.rdb
```


# 火焰图

Expand Down
15 changes: 13 additions & 2 deletions cmd.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@ Options:
-port listen port for flame graph web service
-sep separator for flamegraph, rdb will separate key by it, default value is ":".
supporting multi separators: -sep sep1 -sep sep2
-prefix-sep separator for prefix analysis (flat-map mode, constant memory).
when specified, uses separator-based analysis instead of radix tree.
supporting multi separators: -prefix-sep sep1 -prefix-sep sep2
-regex using regex expression filter keys
-expire filter keys by its expiration time
1. '1751731200~1751817600' get keys with expiration time in range [1751731200, 1751817600]
Expand Down Expand Up @@ -45,7 +48,9 @@ parameters between '[' and ']' is optional
rdb -c bigkey [-o dump.aof] [-n 10] dump.rdb
5. get number and memory size by prefix
rdb -c prefix [-n 10] [-max-depth 3] [-o prefix-report.csv] dump.rdb
6. draw flamegraph
6. get number and memory size by prefix with separator (constant memory)
rdb -c prefix [-n 10] [-max-depth 3] -prefix-sep : [-o prefix-report.csv] dump.rdb
7. draw flamegraph
rdb -c flamegraph [-port 16379] [-sep :] dump.rdb
`

Expand Down Expand Up @@ -74,6 +79,7 @@ func main() {
var maxDepth int
var concurrent int
var showGlobalMeta bool
var prefixSeps separators
var err error
flagSet.StringVar(&cmd, "c", "", "command for rdb: json")
flagSet.StringVar(&output, "o", "", "output file path")
Expand All @@ -86,6 +92,7 @@ func main() {
flagSet.StringVar(&expirationExpr, "expire", "", "expiration filter expression")
flagSet.StringVar(&sizeExpr, "size", "", "size filter expression")
flagSet.BoolVar(&noExpired, "no-expired", false, "filter expired keys(deprecated, please use expire)")
flagSet.Var(&prefixSeps, "prefix-sep", "separator for prefix analysis (flat-map mode, constant memory)")
flagSet.BoolVar(&showGlobalMeta, "show-global-meta", false, "Show global meta likes redis-verion/ctime/functions")
_ = flagSet.Parse(os.Args[1:]) // ExitOnError
src := flagSet.Arg(0)
Expand Down Expand Up @@ -142,7 +149,11 @@ func main() {
case "bigkey":
err = helper.FindBiggestKeys(src, n, outputFile, options...)
case "prefix":
err = helper.PrefixAnalyse(src, n, maxDepth, outputFile, options...)
if len(prefixSeps) > 0 {
err = helper.SepPrefixAnalyse(src, n, maxDepth, prefixSeps, outputFile, options...)
} else {
err = helper.PrefixAnalyse(src, n, maxDepth, outputFile, options...)
}
case "flamegraph":
_, err = helper.FlameGraph(src, port, seps, options...)
if err != nil {
Expand Down
5 changes: 5 additions & 0 deletions cmd_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,11 @@ func TestCmd(t *testing.T) {
if f, _ := os.Stat("tmp/tree.csv"); f == nil {
t.Error("command prefix failed")
}
os.Args = []string{"", "-c", "prefix", "-prefix-sep", ":", "-o", "tmp/tree_sep.csv", "cases/tree.rdb"}
main()
if f, _ := os.Stat("tmp/tree_sep.csv"); f == nil {
t.Error("command prefix with prefix-sep failed")
}

// test error command line
os.Args = []string{"", "-c", "json", "-o", "tmp/output", "/none/a"}
Expand Down
12 changes: 8 additions & 4 deletions go.mod
Original file line number Diff line number Diff line change
@@ -1,11 +1,15 @@
module github.com/hdt3213/rdb

go 1.16
go 1.18

require github.com/bytedance/sonic v1.15.0

require (
github.com/bytedance/sonic v1.12.1
github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311 // indirect
github.com/klauspost/cpuid/v2 v2.2.8 // indirect
github.com/bytedance/gopkg v0.1.3 // indirect
github.com/bytedance/sonic/loader v0.5.0 // indirect
github.com/cloudwego/base64x v0.1.6 // indirect
github.com/klauspost/cpuid/v2 v2.2.9 // indirect
github.com/twitchyliquid64/golang-asm v0.15.1 // indirect
golang.org/x/arch v0.9.0 // indirect
golang.org/x/sys v0.24.0 // indirect
)
42 changes: 14 additions & 28 deletions go.sum
Original file line number Diff line number Diff line change
@@ -1,48 +1,34 @@
github.com/bytedance/sonic v1.5.0/go.mod h1:ED5hyg4y6t3/9Ku1R6dU/4KyJ48DZ4jPhfY1O2AihPM=
github.com/bytedance/sonic v1.8.7 h1:d3sry5vGgVq/OpgozRUNP6xBsSo0mtNdwliApw+SAMQ=
github.com/bytedance/sonic v1.8.7/go.mod h1:i736AoUSYt75HyZLoJW9ERYxcy6eaN6h4BZXU064P/U=
github.com/bytedance/sonic v1.12.1 h1:jWl5Qz1fy7X1ioY74WqO0KjAMtAGQs4sYnjiEBiyX24=
github.com/bytedance/sonic v1.12.1/go.mod h1:B8Gt/XvtZ3Fqj+iSKMypzymZxw/FVwgIGKzMzT9r/rk=
github.com/bytedance/sonic/loader v0.1.1/go.mod h1:ncP89zfokxS5LZrJxl5z0UJcsk4M4yY2JpfqGeCtNLU=
github.com/bytedance/sonic/loader v0.2.0 h1:zNprn+lsIP06C/IqCHs3gPQIvnvpKbbxyXQP1iU4kWM=
github.com/bytedance/sonic/loader v0.2.0/go.mod h1:ncP89zfokxS5LZrJxl5z0UJcsk4M4yY2JpfqGeCtNLU=
github.com/chenzhuoyu/base64x v0.0.0-20211019084208-fb5309c8db06/go.mod h1:DH46F32mSOjUmXrMHnKwZdA8wcEefY7UVqBKYGjpdQY=
github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311 h1:qSGYFH7+jGhDF8vLC+iwCD4WpbV1EBDSzWkJODFLams=
github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311/go.mod h1:b583jCggY9gE99b6G5LEC39OIiVsWj+R97kbl5odCEk=
github.com/cloudwego/base64x v0.1.4 h1:jwCgWpFanWmN8xoIUHa2rtzmkd5J2plF/dnLS6Xd/0Y=
github.com/cloudwego/base64x v0.1.4/go.mod h1:0zlkT4Wn5C6NdauXdJRhSKRlJvmclQ1hhJgA0rcu/8w=
github.com/cloudwego/iasm v0.2.0 h1:1KNIy1I1H9hNNFEEH3DVnI4UujN+1zjpuk6gwHLTssg=
github.com/cloudwego/iasm v0.2.0/go.mod h1:8rXZaNYT2n95jn+zTI1sDr+IgcD2GVs0nlbbQPiEFhY=
github.com/bytedance/gopkg v0.1.3 h1:TPBSwH8RsouGCBcMBktLt1AymVo2TVsBVCY4b6TnZ/M=
github.com/bytedance/gopkg v0.1.3/go.mod h1:576VvJ+eJgyCzdjS+c4+77QF3p7ubbtiKARP3TxducM=
github.com/bytedance/sonic v1.15.0 h1:/PXeWFaR5ElNcVE84U0dOHjiMHQOwNIx3K4ymzh/uSE=
github.com/bytedance/sonic v1.15.0/go.mod h1:tFkWrPz0/CUCLEF4ri4UkHekCIcdnkqXw9VduqpJh0k=
github.com/bytedance/sonic/loader v0.5.0 h1:gXH3KVnatgY7loH5/TkeVyXPfESoqSBSBEiDd5VjlgE=
github.com/bytedance/sonic/loader v0.5.0/go.mod h1:AR4NYCk5DdzZizZ5djGqQ92eEhCCcdf5x77udYiSJRo=
github.com/cloudwego/base64x v0.1.6 h1:t11wG9AECkCDk5fMSoxmufanudBtJ+/HemLstXDLI2M=
github.com/cloudwego/base64x v0.1.6/go.mod h1:OFcloc187FXDaYHvrNIjxSe8ncn0OOM8gEHfghB2IPU=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/klauspost/cpuid/v2 v2.0.9 h1:lgaqFMSdTdQYdZ04uHyN2d/eKdOMyi2YLSvlQIBFYa4=
github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg=
github.com/klauspost/cpuid/v2 v2.2.8 h1:+StwCXwm9PdpiEkPyzBXIy+M9KUb4ODm0Zarf1kS5BM=
github.com/klauspost/cpuid/v2 v2.2.8/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws=
github.com/knz/go-libedit v1.10.1/go.mod h1:MZTVkCWyz0oBc7JOWP3wNAzd002ZbM/5hgShxwh4x8M=
github.com/klauspost/cpuid/v2 v2.2.9 h1:66ze0taIn2H33fBvCkXuv9BmCwDfafmiIVpKV9kKGuY=
github.com/klauspost/cpuid/v2 v2.2.9/go.mod h1:rqkxqrZ1EhYM9G+hXH7YdowN5R5RGN6NK4QwQ3WMXF8=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA=
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk=
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS4MhqMhdFk5YI=
github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08=
golang.org/x/arch v0.0.0-20210923205945-b76863e36670 h1:18EFjUmQOcUvxNYSkA6jO9VAiXCnxFY6NyDX0bHDmkU=
golang.org/x/arch v0.0.0-20210923205945-b76863e36670/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8=
golang.org/x/arch v0.9.0 h1:ub9TgUInamJ8mrZIGlBG6/4TqWeMszd4N8lNorbrr6k=
golang.org/x/arch v0.9.0/go.mod h1:FEVrYAQjsQXMVJ1nsMoVVXPZg6p2JE2mx8psSWTDQys=
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.24.0 h1:Twjiwq9dn6R1fQcyiK+wQyHWfaz/BJB+YIpzU/Cv3Xg=
golang.org/x/sys v0.24.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
nullprogram.com/x/optparse v1.0.0/go.mod h1:KdyPE+Igbe0jQUrVfMqDMeJQIJZEuyV7pjYmp6pbG50=
rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4=
143 changes: 143 additions & 0 deletions helper/prefix_stream.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
package helper

import (
"encoding/csv"
"errors"
"fmt"
"math"
"os"
"sort"
"strconv"
"strings"

"github.com/hdt3213/rdb/bytefmt"
"github.com/hdt3213/rdb/core"
"github.com/hdt3213/rdb/model"
)

type prefixStats struct {
size int
keyCount int
}

// SepPrefixAnalyse reads an RDB file and aggregates memory usage by key prefix
// using a flat map (constant memory). Keys are split by the given separators up to maxDepth.
// Multiple separators are normalized to the first one before splitting.
func SepPrefixAnalyse(rdbFilename string, topN int, maxDepth int, separators []string, output *os.File, options ...interface{}) error {
if rdbFilename == "" {
return errors.New("src file path is required")
}
if len(separators) == 0 {
return errors.New("at least one separator is required")
}
if topN <= 0 {
topN = math.MaxInt
}
if maxDepth <= 0 {
maxDepth = math.MaxInt
}

rdbFile, err := os.Open(rdbFilename)
if err != nil {
return fmt.Errorf("open rdb %s failed, %v", rdbFilename, err)
}
defer rdbFile.Close()

var dec decoder = core.NewDecoder(rdbFile)
if dec, err = wrapDecoder(dec, options...); err != nil {
return err
}

primarySep := separators[0]

// flat map: "db\x00prefix" -> stats
prefixes := make(map[string]*prefixStats)

err = dec.Parse(func(object model.RedisObject) bool {
key := object.GetKey()
db := object.GetDBIndex()
size := object.GetSize()

// normalize all separators to the primary one
normalizedKey := key
for i := 1; i < len(separators); i++ {
normalizedKey = strings.ReplaceAll(normalizedKey, separators[i], primarySep)
}

parts := strings.SplitN(normalizedKey, primarySep, maxDepth+1)

// only emit prefixes that actually group keys —
// skip depth == len(parts) since that's the full key, not a prefix
limit := len(parts) - 1
if limit > maxDepth {
limit = maxDepth
}

for depth := 1; depth <= limit; depth++ {
prefix := strings.Join(parts[:depth], primarySep) + primarySep + "*"
mapKey := strconv.Itoa(db) + "\x00" + prefix

s := prefixes[mapKey]
if s == nil {
s = &prefixStats{}
prefixes[mapKey] = s
}
s.size += size
s.keyCount++
}

return true
})
if err != nil {
return err
}

// sort by size descending
type entry struct {
db string
prefix string
size int
keyCount int
}
entries := make([]entry, 0, len(prefixes))
for mapKey, s := range prefixes {
idx := strings.Index(mapKey, "\x00")
entries = append(entries, entry{
db: mapKey[:idx],
prefix: mapKey[idx+1:],
size: s.size,
keyCount: s.keyCount,
})
}
sort.Slice(entries, func(i, j int) bool {
return entries[i].size > entries[j].size
})

// write CSV
_, err = output.WriteString("database,prefix,size,size_readable,key_count\n")
if err != nil {
return fmt.Errorf("write header failed: %v", err)
}
csvWriter := csv.NewWriter(output)
defer csvWriter.Flush()

limit := topN
if limit > len(entries) {
limit = len(entries)
}
for i := 0; i < limit; i++ {
e := entries[i]
err = csvWriter.Write([]string{
e.db,
e.prefix,
strconv.Itoa(e.size),
bytefmt.FormatSize(uint64(e.size)),
strconv.Itoa(e.keyCount),
})
if err != nil {
return fmt.Errorf("csv write failed: %v", err)
}
}

return nil
}
Loading
Loading