Skip to content

Commit

Permalink
for user test purpose
Browse files Browse the repository at this point in the history
  • Loading branch information
EdenWuyifan committed Sep 10, 2024
1 parent 35640fe commit d407f60
Show file tree
Hide file tree
Showing 47 changed files with 7,283 additions and 8,182 deletions.
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM python:3.11.6 AS bdi-jupyter
FROM --platform=linux/amd64 python:3.11.6 AS bdi-jupyter

# Install JupyterHub and dependencies
RUN pip3 --disable-pip-version-check install --no-cache-dir \
Expand Down
26 changes: 26 additions & 0 deletions bdi-ingress.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: bdiviz-demo-ingress
annotations:
kubernetes.io/ingress.class: haproxy
# The following 2 lines redirect HTTP traffic to HTTPS for you
haproxy.org/ssl-redirect: "true"
haproxy.org/ssl-redirect-code: "301"
# The following line record the user's IP address in the 'X-Forwarded-For' header
haproxy.org/forwarded-for: "true"
# Only NYU networks are allowed by default, uncomment this to allow the whole internet
hpc.nyu.edu/access: "public"
spec:
rules:
- host: bdiviz-demo.users.hsrn.nyu.edu
http:
paths:
- path: /
pathType: Prefix
backend:
service:
# This is the name and port of your Service
name: bdiviz-demo
port:
number: 8888
19 changes: 18 additions & 1 deletion bdikit/mapping_algorithms/value_mapping/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import ast
from openai import OpenAI
from polyfuzz import PolyFuzz
from polyfuzz.models import EditDistance, TFIDF, Embeddings
from polyfuzz.models import EditDistance, TFIDF, Embeddings, RapidFuzz
from flair.embeddings import TransformerWordEmbeddings, WordEmbeddings
from rapidfuzz import fuzz
from autofj import AutoFJ
Expand Down Expand Up @@ -163,6 +163,23 @@ def __init__(
super().__init__(PolyFuzz(method), threshold)


class RapidFuzzValueMatcher(PolyFuzzValueMatcher):
"""
Value matching algorithm based on the cosine similarity of value embeddings.
"""

def __init__(
self,
threshold: float = VALUE_MATCHING_THRESHOLD,
top_n: int = 1,
cosine_method: str = "sparse",
):
method = RapidFuzz(
n_jobs=1,
)
super().__init__(PolyFuzz(method), threshold)


class GPTValueMatcher(BaseValueMatcher):
def __init__(
self,
Expand Down
Loading

0 comments on commit d407f60

Please sign in to comment.