Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

LRU cache for curie lookup #61

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions medikanren/common.rkt
Original file line number Diff line number Diff line change
Expand Up @@ -128,12 +128,13 @@
(map (lambda (name)
(define path (path/data (symbol->string name)))
(define options (list (config-ref 'in-memory-names?)
(config-ref 'in-memory-cuis?)))
(config-ref 'in-memory-cuis?)
(config-ref 'num-cached-cuis)))
(cond ((directory-exists? path)
(when verbose? (printf "loading ~a\n" name))
(cons name (if verbose?
(time (apply make-db path options))
(apply make-db path options))))
(time (apply make-db (cons path options)))
(apply make-db (cons path options)))))
Comment on lines +136 to +137
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Was there an issue using the flat form of apply?

(else (when verbose?
(printf "cannot load ~a; " name)
(printf "directory missing: ~a\n" path))
Expand Down
1 change: 1 addition & 0 deletions medikanren/config.defaults.scm
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

(in-memory-names? . #t)
(in-memory-cuis? . #t)
(num-cached-cuis . #f)

(query-results.write-to-file? . #t) ;; #t will write the query and results to file, #f will not
(query-results.file-name . "last.sx")
Expand Down
13 changes: 11 additions & 2 deletions medikanren/db.rkt
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
(require
"repr.rkt"
"string-search.rkt"
"lru.rkt"
racket/file
racket/stream
racket/string
Expand Down Expand Up @@ -78,7 +79,7 @@
;; category*, predicate*

;; memory-usage: 0 1 2 3
(define (make-db db-dir (in-memory-names? #t) (in-memory-cuis? #t))
(define (make-db db-dir (in-memory-names? #t) (in-memory-cuis? #t) (num-cached-cuis #f))
(define (db-path fname) (expand-user-path (build-path db-dir fname)))
(define (open-db-path fname) (open-input-file (db-path fname)))
(define (open-db-path/optional fname)
Expand Down Expand Up @@ -139,8 +140,16 @@
(define cui-index (port->string-keys in-concept-cui-index))
(close-input-port in-concept-cui-index)
(lambda (cui*) (string:corpus-find* cui-corpus cui-index cui*)))
(num-cached-cuis
(define lru
(make-lru
(lambda (cui) (string:corpus-find/disk cid->concept in-concept-cui-index cui))
#:num-entries-max num-cached-cuis))
(define (lookup cui) (lru-ref lru cui))
(lambda (cui*) (string:corpus-find*/disk cid->concept lookup cui*)))
(else
(lambda (cui*) (string:corpus-find*/disk cid->concept in-concept-cui-index cui*)))))
(define (lookup cui) (string:corpus-find/disk cid->concept in-concept-cui-index cui))
(lambda (cui*) (string:corpus-find*/disk cid->concept lookup cui*)))))

(define ~name*->cid*
(cond (in-memory-names?
Expand Down
234 changes: 234 additions & 0 deletions medikanren/lru.rkt
Original file line number Diff line number Diff line change
@@ -0,0 +1,234 @@
#lang racket
(provide
make-lru
lru-ref
)
(require chk)


(define (assert k st)
(if (not k)
(raise (format "assertion failure: ~a" st))
#f))
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Dead code.

Otherwise, use when since the result won't be useful.


(struct payload (k v))

(struct lrun (
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What does the n in lrun mean? n for node in a doubly-linked list?

(older #:mutable)
payload
(newer #:mutable)))

(struct lru (
behind-ref
hash
(lrun-oldest #:mutable)
(lrun-newest #:mutable)
(num-entries #:mutable)
num-entries-max))

(define (make-lru behind-ref #:num-entries-max (num-entries-max 1000))
(lru
behind-ref
(make-hash)
#f
#f
0
num-entries-max))

;;; Add a newest entry to the lru.
(define (lru-put-newest ths payload1)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

lru-put-newest!

(let* (
(lrun1 (lru-lrun-newest ths))
(k (payload-k payload1))
(lrun0 (lrun lrun1 payload1 #f))
)
; connect lrun fields
; lrun0.older is already connected
(set-lrun-older! lrun0 lrun1)
(if lrun1
(set-lrun-newer! lrun1 lrun0)
#f)
gregr marked this conversation as resolved.
Show resolved Hide resolved
; connect lru fields
(set-lru-lrun-newest! ths lrun0)
(if (not (lru-lrun-oldest ths)) ; are we brand new?
(set-lru-lrun-oldest! ths lrun0)
#f)
gregr marked this conversation as resolved.
Show resolved Hide resolved
(set-lru-num-entries! ths (+ (lru-num-entries ths) 1))
(hash-set! (lru-hash ths) k lrun0)))

(define (lru-remove ths lrun1)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

lru-remove!

(let* (
(payload1 (lrun-payload lrun1))
(k (payload-k payload1))
(lrun0 (lrun-older lrun1))
(lrun2 (lrun-newer lrun1))
)
(if lrun0
(set-lrun-newer! lrun0 lrun2)
(begin
; we are removing the oldest
(set-lru-lrun-oldest! ths lrun2)
(set-lrun-older! lrun2 #f)))
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

lrun2 may be #f here. Line 73 already takes care of this though, so it should be fine to delete this line.

(if lrun2
(set-lrun-older! lrun2 lrun0)
(begin
; we are removing the newest
(set-lru-lrun-newest! ths lrun0)
(set-lrun-newer! lrun0 #f)))
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same issue with lrun0 possibly being #f here. See the other comment.

(set-lru-num-entries! ths (- (lru-num-entries ths) 1))
(hash-remove! (lru-hash ths) k)))


;;; If the lru is full, remove the oldest entry.
(define (lru-evict ths)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

lru-evict!

(if (> (lru-num-entries ths) (lru-num-entries-max ths))
(let* ((lrun1 (lru-lrun-oldest ths)))
(lru-remove ths lrun1))
#f))
gregr marked this conversation as resolved.
Show resolved Hide resolved

;;; Make the entry with key k the newest entry.
(define (lru-freshen ths k)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

lru-freshen!

(let* (
(lrun1 (hash-ref (lru-hash ths) k))
(payload1 (lrun-payload lrun1))
)
(lru-remove ths lrun1)
(lru-put-newest ths payload1)))


;;; Fetch item from lru cache, or if absent, from ref-behind.
(define (lru-ref ths k)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

lru-ref!

(match (hash-ref! (lru-hash ths) k #f)
(#f
(let ((v ((lru-behind-ref ths) k)))
(lru-put-newest ths (payload k v))
(lru-evict ths)
v))
(lrun
(if (>= (lru-num-entries ths) 2) ; freshen 1 entry is noop
(lru-freshen ths k)
#f)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

when

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It might make sense to put this short-circuit case into lru-freshen itself so its callers don't have to think about it.

(payload-v (lrun-payload lrun)))))



(module+ test
(define (make-test-hash n)
(define h (make-hash))
(for ((i (range n)))
(hash-set! h i (+ i 100)))
h)

;; do we get a correct value back on a miss?
(chk
#:do (define h1 (make-test-hash 100))
#:do (define l (make-lru (lambda (k) (hash-ref h1 k #f)) #:num-entries-max 3))
#:do (define v (lru-ref l 5))
#:= v 105)

;; do we get a correct value back on a hit?
(chk
#:do (define h1 (make-test-hash 100))
#:do (define l (make-lru (lambda (k) (hash-ref h1 k #f)) #:num-entries-max 3))
#:do (lru-ref l 5)
#:do (define v (lru-ref l 5))
#:= v 105)

;; do we initialize properly?
(chk
#:do (define h1 (make-test-hash 100))
#:do (define l (make-lru (lambda (k) (hash-ref h1 k #f)) #:num-entries-max 3))
#:do (define v (lru-ref l 5))
#:t (lru-lrun-newest l)
#:t (lru-lrun-oldest l))

;; when the oldest entry becomes the newest entry, is the next eviction correct?
(chk
#:do (define h1 (make-test-hash 100))
#:do (define l (make-lru (lambda (k) (hash-ref h1 k #f)) #:num-entries-max 3))
#:do (lru-ref l 5)
#:do (lru-ref l 6)
#:do (lru-ref l 7)
#:do (lru-ref l 5)
#:do (lru-ref l 8)
#:t (not (hash-has-key? (lru-hash l) 6)))

;; does num-entries grow as expeced?
(chk
#:do (define h1 (make-test-hash 100))
#:do (define l (make-lru (lambda (k) (hash-ref h1 k #f)) #:num-entries-max 3))
#:do (lru-ref l 5)
#:= (lru-num-entries l) 1)

(chk
#:do (define h1 (make-test-hash 100))
#:do (define l (make-lru (lambda (k) (hash-ref h1 k #f)) #:num-entries-max 3))
#:do (lru-ref l 5)
#:do (lru-ref l 6)
#:= (lru-num-entries l) 2)

(chk
#:do (define h1 (make-test-hash 100))
#:do (define l (make-lru (lambda (k) (hash-ref h1 k #f)) #:num-entries-max 3))
#:do (lru-ref l 5)
#:do (lru-ref l 6)
#:do (lru-ref l 7)
#:= (lru-num-entries l) 3)

(chk
#:do (define h1 (make-test-hash 100))
#:do (define l (make-lru (lambda (k) (hash-ref h1 k #f)) #:num-entries-max 3))
#:do (lru-ref l 5)
#:do (lru-ref l 6)
#:do (lru-ref l 7)
#:do (lru-ref l 8)
#:= (lru-num-entries l) 3)

;; do we make the expected number of upstream calls?

(chk
#:do (define h1 (make-test-hash 100))
#:do (define num-calls (box 0))
#:do (define (incr) (set-box! num-calls (+ 1 (unbox num-calls))))
#:do (define l (make-lru
(lambda (k) (incr) (hash-ref h1 k #f))
#:num-entries-max 2))
#:do (lru-ref l 5)
#:= (unbox num-calls) 1)

(chk
#:do (define h1 (make-test-hash 100))
#:do (define num-calls (box 0))
#:do (define (incr) (set-box! num-calls (+ 1 (unbox num-calls))))
#:do (define l (make-lru
(lambda (k) (incr) (hash-ref h1 k #f))
#:num-entries-max 2))
#:do (lru-ref l 5)
#:do (lru-ref l 5)
#:= (unbox num-calls) 1)

(chk
#:do (define h1 (make-test-hash 100))
#:do (define num-calls (box 0))
#:do (define (incr) (set-box! num-calls (+ 1 (unbox num-calls))))
#:do (define l (make-lru
(lambda (k) (incr) (hash-ref h1 k #f))
#:num-entries-max 2))
#:do (lru-ref l 5)
#:do (lru-ref l 6)
#:do (lru-ref l 5)
#:= (unbox num-calls) 2)

(chk
#:do (define h1 (make-test-hash 100))
#:do (define num-calls (box 0))
#:do (define (incr) (set-box! num-calls (+ 1 (unbox num-calls))))
#:do (define l (make-lru
(lambda (k) (incr) (hash-ref h1 k #f))
#:num-entries-max 2))
#:do (lru-ref l 5)
#:do (lru-ref l 6)
#:do (lru-ref l 7)
#:do (lru-ref l 5)
#:= (unbox num-calls) 4)
)
6 changes: 4 additions & 2 deletions medikanren/string-search.rkt
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
suffix:corpus-find*/disk
string:corpus->index
string:corpus-find*
string:corpus-find/disk
string:corpus-find*/disk)
(require
"repr.rkt"
Expand Down Expand Up @@ -328,7 +329,8 @@
(range rstart rend))))))
(else '()))))

(define (string:corpus-find*/disk cid->concept in-index str*)

(define (string:corpus-find*/disk cid->concept lookup str*)
(remove-duplicates
(sort (append* (map (lambda (s) (string:corpus-find/disk cid->concept in-index s)) str*))
(sort (append* (map lookup str*))
<)))