forked from projectdiscovery/alterx
-
Notifications
You must be signed in to change notification settings - Fork 0
/
dedupe.go
61 lines (54 loc) · 1.39 KB
/
dedupe.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
package alterx
import "github.com/projectdiscovery/alterx/internal/dedupe"
// MaxInMemoryDedupeSize (default : 100 MB)
var MaxInMemoryDedupeSize = 100 * 1024 * 1024
type DedupeBackend interface {
// Upsert add/update key to backend/database
Upsert(elem string)
// Execute given callback on each element while iterating
IterCallback(callback func(elem string))
// Cleanup cleans any residuals after deduping
Cleanup()
}
// Dedupe is string deduplication type which removes
// all duplicates if
type Dedupe struct {
receive <-chan string
backend DedupeBackend
}
// Drains channel and tries to dedupe it
func (d *Dedupe) Drain() {
for {
val, ok := <-d.receive
if !ok {
break
}
d.backend.Upsert(val)
}
}
// GetResults iterates over dedupe storage and returns results
func (d *Dedupe) GetResults() <-chan string {
send := make(chan string, 100)
go func() {
defer close(send)
d.backend.IterCallback(func(elem string) {
send <- elem
})
d.backend.Cleanup()
}()
return send
}
// NewDedupe returns a dedupe instance which removes all duplicates
// Note: If byteLen is not correct/specified alterx may consume lot of memory
func NewDedupe(ch <-chan string, byteLen int) *Dedupe {
d := &Dedupe{
receive: ch,
}
if byteLen <= MaxInMemoryDedupeSize {
d.backend = dedupe.NewMapBackend()
} else {
// gologger print a info message here
d.backend = dedupe.NewLevelDBBackend()
}
return d
}