Skip to content

Commit

Permalink
Merge branch 'master' into clean-complete
Browse files Browse the repository at this point in the history
  • Loading branch information
anshbansal authored Jul 15, 2022
2 parents 4d142eb + fa07dc6 commit 1ac0f0e
Show file tree
Hide file tree
Showing 776 changed files with 98,094 additions and 58,086 deletions.
4 changes: 4 additions & 0 deletions .github/workflows/build-and-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,10 @@ on:
release:
types: [published, edited]

concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true

jobs:
build:
runs-on: ubuntu-latest
Expand Down
4 changes: 4 additions & 0 deletions .github/workflows/check-datahub-jars.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@ on:
release:
types: [published, edited]

concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true

jobs:

check_jars:
Expand Down
42 changes: 42 additions & 0 deletions .github/workflows/docker-ingestion-base.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
name: ingestion base
on:
release:
types: [published, edited]
push:
branches:
- master
paths:
- "docker/datahub-ingestion/**"
workflow_dispatch:

concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true

jobs:

build-base:
name: Build and Push Docker Image to Docker Hub
runs-on: ubuntu-latest
steps:
- name: Check out the repo
uses: actions/checkout@v2
with:
fetch-depth: 0
- name: Set up QEMU
uses: docker/setup-qemu-action@v1
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v1
- name: Login to DockerHub
uses: docker/login-action@v1
with:
username: ${{ secrets.ACRYL_DOCKER_USERNAME }}
password: ${{ secrets.ACRYL_DOCKER_PASSWORD }}
- name: Build and Push image
uses: docker/build-push-action@v2
with:
context: ./docker/datahub-ingestion
file: ./docker/datahub-ingestion/base.Dockerfile
platforms: linux/amd64,linux/arm64
tags: acryldata/datahub-ingestion-base:latest
push: true
42 changes: 42 additions & 0 deletions .github/workflows/docker-ingestion-smoke.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
name: ingestion smoke
on:
release:
types: [published, edited]
push:
branches:
- master
paths:
- "docker/datahub-ingestion/**"
workflow_dispatch:

concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true

jobs:

build-smoke:
name: Build and Push Docker Image to Docker Hub
runs-on: ubuntu-latest
steps:
- name: Check out the repo
uses: actions/checkout@v2
with:
fetch-depth: 0
- name: Set up QEMU
uses: docker/setup-qemu-action@v1
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v1
- name: Login to DockerHub
uses: docker/login-action@v1
with:
username: ${{ secrets.ACRYL_DOCKER_USERNAME }}
password: ${{ secrets.ACRYL_DOCKER_PASSWORD }}
- name: Build and Push image
uses: docker/build-push-action@v2
with:
context: ./docker/datahub-ingestion
file: ./docker/datahub-ingestion/smoke.Dockerfile
platforms: linux/amd64,linux/arm64
tags: acryldata/datahub-ingestion-base:smoke
push: true
4 changes: 4 additions & 0 deletions .github/workflows/docker-unified.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@ on:
release:
types: [published, edited]

concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true

env:
DATAHUB_GMS_IMAGE: 'linkedin/datahub-gms'
DATAHUB_FRONTEND_IMAGE: 'linkedin/datahub-frontend-react'
Expand Down
4 changes: 4 additions & 0 deletions .github/workflows/documentation.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@ on:
# release:
# types: [published, edited]

concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true

jobs:
gh-pages:
runs-on: ubuntu-latest
Expand Down
51 changes: 0 additions & 51 deletions .github/workflows/metadata-ingestion-slow.yml

This file was deleted.

25 changes: 13 additions & 12 deletions .github/workflows/metadata-ingestion.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,17 @@ on:
release:
types: [published, edited]

concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true

jobs:

metadata-ingestion-general:
runs-on: ubuntu-latest
env:
SPARK_VERSION: 3.0.3
DATAHUB_TELEMETRY_ENABLED: false
strategy:
matrix:
python-version: ["3.6", "3.9"]
Expand All @@ -29,30 +34,28 @@ jobs:
- uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
- uses: vemonet/setup-spark@v1 # spark is required for pyspark+pydeequ data lake profiling
with:
spark-version: '3.0.3'
hadoop-version: '3.2'
- name: Install dependencies
run: ./metadata-ingestion/scripts/install_deps.sh
- name: Run metadata-ingestion tests
run: ./gradlew :metadata-ingestion:build :metadata-ingestion:check
run: ./gradlew :metadata-ingestion:build :metadata-ingestion:testQuick :metadata-ingestion:check
- uses: actions/upload-artifact@v2
if: always()
with:
name: Test Results (metadata ingestion general)
name: Test Results (metadata ingestion ${{ matrix.python-version }} testQuick)
path: |
**/build/reports/tests/test/**
**/build/test-results/test/**
**/junit.*.xml
metadata-ingestion-by-version:
metadata-ingestion:
runs-on: ubuntu-latest
env:
SPARK_VERSION: 3.0.3
DATAHUB_TELEMETRY_ENABLED: false
strategy:
matrix:
python-version: ["3.6", "3.9"]
command: ["installAirflow1", "testIntegration", "testIntegrationBatch1", "testSlowIntegration"]
fail-fast: false
steps:
- uses: actions/checkout@v2
Expand All @@ -64,11 +67,9 @@ jobs:
spark-version: '3.0.3'
hadoop-version: '3.2'
- name: Install dependencies
run: ./metadata-ingestion/scripts/install_deps.sh && python -m pip install --upgrade pip && pip install tox tox-gh-actions
- name: Codegen
run: ./gradlew :metadata-ingestion:codegen
- name: Run tox tests
run: cd metadata-ingestion && tox
run: ./metadata-ingestion/scripts/install_deps.sh
- name: Run metadata-ingestion tests
run: ./gradlew :metadata-ingestion:build :metadata-ingestion:${{ matrix.command }} -x:metadata-ingestion:testQuick -x:metadata-ingestion:check
- uses: actions/upload-artifact@v2
if: always()
with:
Expand Down
2 changes: 2 additions & 0 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ project.ext.externalDependency = [
'junitJupiterEngine': "org.junit.jupiter:junit-jupiter-engine:$junitJupiterVersion",
// avro-serde includes dependencies for `kafka-avro-serializer` `kafka-schema-registry-client` and `avro`
'kafkaAvroSerde': 'io.confluent:kafka-streams-avro-serde:5.5.1',
'kafkaAvroSerializer': 'io.confluent:kafka-avro-serializer:5.1.4',
'kafkaClients': 'org.apache.kafka:kafka-clients:2.3.0',
'logbackClassic': 'ch.qos.logback:logback-classic:1.2.9',
'lombok': 'org.projectlombok:lombok:1.18.12',
Expand All @@ -115,6 +116,7 @@ project.ext.externalDependency = [
'parquet': 'org.apache.parquet:parquet-avro:1.12.2',
'picocli': 'info.picocli:picocli:4.5.0',
'playCache': 'com.typesafe.play:play-cache_2.12:2.7.6',
'playEhcache': 'com.typesafe.play:play-ehcache_2.12:2.7.6',
'playWs': 'com.typesafe.play:play-ahc-ws-standalone_2.12:2.0.8',
'playDocs': 'com.typesafe.play:play-docs_2.12:2.7.6',
'playGuice': 'com.typesafe.play:play-guice_2.12:2.7.6',
Expand Down
2 changes: 1 addition & 1 deletion datahub-frontend/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ WHZ-Authentication {

### Authentication in React
The React app supports both JAAS as described above and separately OIDC authentication. To learn about configuring OIDC for React,
see the [OIDC in React](../docs/how/auth/sso/configure-oidc-react.md) document.
see the [OIDC in React](../docs/authentication/guides/sso/configure-oidc-react.md) document.


### API Debugging
Expand Down
50 changes: 35 additions & 15 deletions datahub-frontend/app/auth/AuthModule.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import org.pac4j.core.context.session.SessionStore;
import org.pac4j.play.LogoutController;
import org.pac4j.play.http.PlayHttpActionAdapter;
import org.pac4j.play.store.PlayCacheSessionStore;
import org.pac4j.play.store.PlayCookieSessionStore;
import org.pac4j.play.store.PlaySessionStore;
import org.pac4j.play.store.ShiroAesDataEncrypter;
Expand All @@ -32,6 +33,7 @@
import auth.sso.SsoConfigs;
import auth.sso.SsoManager;
import controllers.SsoCallbackController;
import play.cache.SyncCacheApi;
import utils.ConfigUtil;

import static auth.AuthUtils.*;
Expand All @@ -51,6 +53,8 @@ public class AuthModule extends AbstractModule {
* We hash this value (SHA1), then take the first 16 bytes as the AES key.
*/
private static final String PAC4J_AES_KEY_BASE_CONF = "play.http.secret.key";
private static final String PAC4J_SESSIONSTORE_PROVIDER_CONF = "pac4j.sessionStore.provider";

private final com.typesafe.config.Config _configs;

public AuthModule(final Environment environment, final com.typesafe.config.Config configs) {
Expand All @@ -59,22 +63,38 @@ public AuthModule(final Environment environment, final com.typesafe.config.Confi

@Override
protected void configure() {
PlayCookieSessionStore playCacheCookieStore;
try {
// To generate a valid encryption key from an input value, we first
// hash the input to generate a fixed-length string. Then, we convert
// it to hex and slice the first 16 bytes, because AES key length must strictly
// have a specific length.
final String aesKeyBase = _configs.getString(PAC4J_AES_KEY_BASE_CONF);
final String aesKeyHash = DigestUtils.sha1Hex(aesKeyBase.getBytes(StandardCharsets.UTF_8));
final String aesEncryptionKey = aesKeyHash.substring(0, 16);
playCacheCookieStore = new PlayCookieSessionStore(
new ShiroAesDataEncrypter(aesEncryptionKey));
} catch (Exception e) {
throw new RuntimeException("Failed to instantiate Pac4j cookie session store!", e);
/**
* In Pac4J, you are given the option to store the profiles of authenticated users in either
* (i) PlayCacheSessionStore - saves your data in the Play cache or
* (ii) PlayCookieSessionStore saves your data in the Play session cookie
* However there is problem (https://github.com/datahub-project/datahub/issues/4448) observed when storing the Pac4j profile in cookie.
* Whenever the profile returned by Pac4j is greater than 4096 characters, the response will be rejected by the browser.
* Default to PlayCacheCookieStore so that datahub-frontend container remains as a stateless service
*/
String sessionStoreProvider = _configs.getString(PAC4J_SESSIONSTORE_PROVIDER_CONF);

if (sessionStoreProvider.equals("PlayCacheSessionStore")) {
final PlayCacheSessionStore playCacheSessionStore = new PlayCacheSessionStore(getProvider(SyncCacheApi.class));
bind(SessionStore.class).toInstance(playCacheSessionStore);
bind(PlaySessionStore.class).toInstance(playCacheSessionStore);
} else {
PlayCookieSessionStore playCacheCookieStore;
try {
// To generate a valid encryption key from an input value, we first
// hash the input to generate a fixed-length string. Then, we convert
// it to hex and slice the first 16 bytes, because AES key length must strictly
// have a specific length.
final String aesKeyBase = _configs.getString(PAC4J_AES_KEY_BASE_CONF);
final String aesKeyHash = DigestUtils.sha1Hex(aesKeyBase.getBytes(StandardCharsets.UTF_8));
final String aesEncryptionKey = aesKeyHash.substring(0, 16);
playCacheCookieStore = new PlayCookieSessionStore(
new ShiroAesDataEncrypter(aesEncryptionKey));
} catch (Exception e) {
throw new RuntimeException("Failed to instantiate Pac4j cookie session store!", e);
}
bind(SessionStore.class).toInstance(playCacheCookieStore);
bind(PlaySessionStore.class).toInstance(playCacheCookieStore);
}
bind(SessionStore.class).toInstance(playCacheCookieStore);
bind(PlaySessionStore.class).toInstance(playCacheCookieStore);

try {
bind(SsoCallbackController.class).toConstructor(SsoCallbackController.class.getConstructor(
Expand Down
4 changes: 1 addition & 3 deletions datahub-frontend/app/auth/JAASConfigs.java
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,7 @@ public class JAASConfigs {
private Boolean _isEnabled = true;

public JAASConfigs(final com.typesafe.config.Config configs) {
if (configs.hasPath(JAAS_ENABLED_CONFIG_PATH)
&& Boolean.FALSE.equals(
Boolean.parseBoolean(configs.getValue(JAAS_ENABLED_CONFIG_PATH).toString()))) {
if (configs.hasPath(JAAS_ENABLED_CONFIG_PATH) && !configs.getBoolean(JAAS_ENABLED_CONFIG_PATH)) {
_isEnabled = false;
}
}
Expand Down
Loading

0 comments on commit 1ac0f0e

Please sign in to comment.