This repository has been archived by the owner on Dec 5, 2022. It is now read-only.
-
-
Notifications
You must be signed in to change notification settings - Fork 9
/
duplicate-finder-parallel.rkt
70 lines (59 loc) · 1.97 KB
/
duplicate-finder-parallel.rkt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
#lang racket/base
(require racket/place
racket/list
racket/dict
openssl/sha1)
(provide main)
(define (find-duplicates directory)
(define compute-file-hash
(for/fold
([cfh (λ (file-hash) file-hash)])
([f (directory-list directory)])
(define full-path (build-path directory f))
(cond
[(directory-exists? full-path)
(define p (place-find-duplicates-spawn full-path))
(λ (fh)
(combine-hash (cfh fh)
(place-channel-get p)))]
[else
(define stamp
;; I had an idea to first find out the things
;; that are the same size and then go back and
;; in parallel check each of their sha1
;; checksums, but just changing this line only
;; save 30 ms, so it probably isn't worth it.
#;(file-size full-path)
(call-with-input-file full-path sha1))
(λ (fh)
(hash-cons (cfh fh)
stamp
full-path))])))
(compute-file-hash (hash)))
(define (print-duplicates file-hash)
(for ([l (in-dict-values file-hash)]
#:when (> (length l) 1))
(printf "duplicates:~n")
(for ([p l])
(printf "\t~a~n" p))
(newline)))
(define (hash-cons h k v)
(hash-update h k (λ (old) (cons v old)) empty))
(define (hash-append h k vs)
(hash-update h k (λ (old) (append vs old)) empty))
(define (hash-append* k*vs h)
(hash-append h (car k*vs) (cdr k*vs)))
(define (combine-hash h assocs)
(foldl hash-append* h assocs))
(define (place-find-duplicates-spawn pth)
(define p (place ch (place-find-duplicates ch)))
(place-channel-put p pth)
p)
(define (place-find-duplicates ch)
(define directory (place-channel-get ch))
(define file-hash (find-duplicates directory))
(place-channel-put ch (hash->list file-hash)))
(define (main)
(time
(define p (place-find-duplicates-spawn "/home/jay/Downloads/kdict"))
(print-duplicates (place-channel-get p))))