Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Add Randomized SVD in PCA] #300

Merged
merged 32 commits into from
Aug 15, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
f745ea0
add rsvd in pca.py
tarantula-leo Aug 11, 2023
8c82a21
add rsvd in pca.py
tarantula-leo Aug 11, 2023
1df1a89
add rsvd in pca_test.py
tarantula-leo Aug 11, 2023
1bbc95d
add rsvd in pca_emul.py
tarantula-leo Aug 11, 2023
464c442
Update BUILD.bazel
tarantula-leo Aug 11, 2023
fb93b55
Update pca.py
tarantula-leo Aug 14, 2023
812339e
Update pca_test.py
tarantula-leo Aug 14, 2023
9d77508
Update pca_emul.py
tarantula-leo Aug 14, 2023
74ced49
Update pca.py
tarantula-leo Aug 14, 2023
afb1adc
Update pca.py
tarantula-leo Aug 14, 2023
8603909
Update pca_test.py
tarantula-leo Aug 14, 2023
f710e62
Update extmath.py
tarantula-leo Aug 14, 2023
5e95f20
Update extmath.py
tarantula-leo Aug 14, 2023
f825820
Update BUILD.bazel
tarantula-leo Aug 14, 2023
f10374e
Update pca_emul.py
tarantula-leo Aug 14, 2023
4620a62
Update pca_emul.py
tarantula-leo Aug 14, 2023
dfe39a6
Update BUILD.bazel
tarantula-leo Aug 14, 2023
c6899e5
Update BUILD.bazel
tarantula-leo Aug 14, 2023
2f91851
Update pca_test.py
tarantula-leo Aug 14, 2023
7e0dbb6
Update pca.py
tarantula-leo Aug 14, 2023
2978586
Update pca_emul.py
tarantula-leo Aug 14, 2023
185df27
Create 3pc.json
tarantula-leo Aug 14, 2023
bca7bc1
Update pca_emul.py
tarantula-leo Aug 14, 2023
4e8f5a4
Update pca_emul.py
tarantula-leo Aug 14, 2023
25c48f2
Create 3pc.json
tarantula-leo Aug 15, 2023
ad60011
Update BUILD.bazel
tarantula-leo Aug 15, 2023
90d0545
Create rsvd_pca_emul.py
tarantula-leo Aug 15, 2023
eddfbac
Update pca_emul.py
tarantula-leo Aug 15, 2023
9f891f6
Update extmath.py
tarantula-leo Aug 15, 2023
ad2b06a
Delete 3pc.json
tarantula-leo Aug 15, 2023
b23929b
Update BUILD.bazel
tarantula-leo Aug 15, 2023
b7113c7
Update BUILD.bazel
tarantula-leo Aug 15, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions sml/decomposition/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,5 @@ package(default_visibility = ["//visibility:public"])
py_library(
name = "pca",
srcs = ["pca.py"],
deps = ["//sml/utils:extmath"],
)
52 changes: 52 additions & 0 deletions sml/decomposition/emulations/3pc.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
{
"id": "outsourcing.3pc",
"nodes": {
"node:0": "127.0.0.1:9920",
"node:1": "127.0.0.1:9921",
"node:2": "127.0.0.1:9922",
"node:3": "127.0.0.1:9923",
"node:4": "127.0.0.1:9924"
},
"devices": {
"SPU": {
"kind": "SPU",
"config": {
"node_ids": [
"node:0",
"node:1",
"node:2"
],
"spu_internal_addrs": [
"127.0.0.1:9930",
"127.0.0.1:9931",
"127.0.0.1:9932"
],
"experimental_data_folder": [
"/tmp/spu_data_0/",
"/tmp/spu_data_1/",
"/tmp/spu_data_2/"
],
"runtime_config": {
"protocol": "ABY3",
"field": "FM128",
"fxp_fraction_bits": 30,
"enable_pphlo_profile": true,
"enable_hal_profile": true,
"enable_pphlo_trace": false
}
}
},
"P1": {
"kind": "PYU",
"config": {
"node_id": "node:3"
}
},
"P2": {
"kind": "PYU",
"config": {
"node_id": "node:4"
}
}
}
}
17 changes: 17 additions & 0 deletions sml/decomposition/emulations/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,20 @@ py_binary(
"//sml/utils:emulation",
],
)

py_binary(
name = "rsvd_pca_emul",
srcs = ["rsvd_pca_emul.py"],
data = [":conf"],
deps = [
"//sml/decomposition:pca",
"//sml/utils:emulation",
],
)

filegroup(
name = "conf",
srcs = [
"3pc.json",
],
)
6 changes: 3 additions & 3 deletions sml/decomposition/emulations/pca_emul.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,13 @@
from sklearn.decomposition import PCA as SklearnPCA

# Add the library directory to the path
sys.path.append(os.path.join(os.path.dirname(__file__), '../../'))
sys.path.append(os.path.join(os.path.dirname(__file__), '../../../'))

import sml.utils.emulation as emulation
from sml.decomposition.pca import PCA


def emul_PCA(mode: emulation.Mode.MULTIPROCESS):
def emul_powerPCA(mode: emulation.Mode.MULTIPROCESS):
def proc(X):
model = PCA(
method='power_iteration',
Expand Down Expand Up @@ -94,4 +94,4 @@ def proc_reconstruct(X):


if __name__ == "__main__":
emul_PCA(emulation.Mode.MULTIPROCESS)
emul_powerPCA(emulation.Mode.MULTIPROCESS)
107 changes: 107 additions & 0 deletions sml/decomposition/emulations/rsvd_pca_emul.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
# Copyright 2023 Ant Group Co., Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os
import sys

import jax.numpy as jnp
import jax.random as random
import numpy as np
from sklearn.decomposition import PCA as SklearnPCA

# Add the library directory to the path
sys.path.append(os.path.join(os.path.dirname(__file__), '../../../'))

import sml.utils.emulation as emulation
from sml.decomposition.pca import PCA


def emul_rsvdPCA(mode: emulation.Mode.MULTIPROCESS):
print("emul rsvdPCA.")

def proc(X, random_matrix, n_components, n_oversamples, max_power_iter, scale):
model = PCA(
method='rsvd',
n_components=n_components,
n_oversamples=n_oversamples,
random_matrix=random_matrix,
max_power_iter=max_power_iter,
scale=scale,
)

model.fit(X)
X_transformed = model.transform(X)
X_variances = model._variances
X_reconstructed = model.inverse_transform(X_transformed)

return X_transformed, X_variances, X_reconstructed

try:
# bandwidth and latency only work for docker mode
conf_path = "sml/decomposition/emulations/3pc.json"
emulator = emulation.Emulator(conf_path, mode, bandwidth=300, latency=20)
emulator.up()

# Create a simple dataset
X = random.normal(random.PRNGKey(0), (1000, 20))
X_spu = emulator.seal(X)
n_components = 5
n_oversamples = 10
max_power_iter = 300
scale = (10000000, 10000)

# Create random_matrix
random_state = np.random.RandomState(0)
random_matrix = random_state.normal(
size=(X.shape[1], n_components + n_oversamples)
)
random_matrix_spu = emulator.seal(random_matrix)

result = emulator.run(proc, static_argnums=(2, 3, 4, 5))(
X_spu, random_matrix_spu, n_components, n_oversamples, max_power_iter, scale
)
print("X_transformed_spu: ", result[0][:5, :])
print("X_variance_spu: ", result[1])
print("X_reconstructed_spu:", result[2][:5, :])

# The transformed data should have 2 dimensions
assert result[0].shape[1] == n_components

# The mean of the transformed data should be approximately 0
assert jnp.allclose(jnp.mean(result[0], axis=0), 0, atol=1e-3)

# Compare with sklearn
model = SklearnPCA(
n_components=n_components,
svd_solver="randomized",
power_iteration_normalizer="QR",
random_state=0,
)
model.fit(X)
X_transformed = model.transform(X)
X_variances = model.explained_variance_
X_reconstructed = model.inverse_transform(X_transformed)

print("X_transformed_sklearn: ", X_transformed[:5, :])
print("X_variances_sklearn: ", X_variances)
print("X_reconstructed_sklearn: ", X_reconstructed[:5, :])

assert np.allclose(X_reconstructed, result[2], atol=1e-1)

finally:
emulator.down()


if __name__ == "__main__":
emul_rsvdPCA(emulation.Mode.MULTIPROCESS)
Loading
Loading