Sage-Bionetworks · jaymedina · Oct 23, 2024 · Oct 23, 2024 · Oct 23, 2024 · Oct 23, 2024
@@ -0,0 +1,167 @@
+---
+name: Test Changes with Cloned DB
+
+on:
+    pull_request:
+        types: [ labeled, synchronize, closed ]
+    push:
+
+permissions:
+    contents: read
+
+jobs:
+
+    create_clone_and_run_schemachange:
+        runs-on: ubuntu-latest
+        if: contains(github.event.pull_request.labels.*.name, 'create_clone_and_run_schemachange') && github.event.pull_request.state == 'open'
+        environment: dev
+        env:
+            SNOWFLAKE_PASSWORD: ${{ secrets.SNOWSQL_PWD }}
+            SNOWFLAKE_ACCOUNT: ${{ secrets.SNOWSQL_ACCOUNT }}
+            SNOWFLAKE_USER: ${{ secrets.SNOWSQL_USER }}
+            SNOWFLAKE_WAREHOUSE: ${{ secrets.SNOWSQL_WAREHOUSE }}
+            SNOWFLAKE_CLONE_ROLE: DATA_ENGINEER
+            SNOWFLAKE_SCHEMACHANGE_ROLE: SYSADMIN
+            SNOWFLAKE_SYNAPSE_DATA_WAREHOUSE_DATABASE_ORIG: ${{ vars.SNOWFLAKE_SYNAPSE_DATA_WAREHOUSE_DATABASE }}
+            SNOWFLAKE_SYNAPSE_STAGE_STORAGE_INTEGRATION: ${{ vars.SNOWFLAKE_SYNAPSE_STAGE_STORAGE_INTEGRATION }}
+            SNOWFLAKE_SYNAPSE_STAGE_URL: ${{ vars.SNOWFLAKE_SYNAPSE_STAGE_URL }}
+            CLONE_NAME: "${{ vars.SNOWFLAKE_SYNAPSE_DATA_WAREHOUSE_DATABASE }}_${{ github.head_ref }}"
+            STACK: ${{ vars.STACK }}
+
+        steps:
+
+            - uses: actions/checkout@v4
+            - uses: actions/setup-python@v4
+              with:
+                python-version: '3.10'
+
+            - name: Install python libraries
+              shell: bash
+              run: |
+                  pip install schemachange==3.6.1
+                  pip install numpy==1.26.4
+                  pip install pandas==1.5.3
+
+            - name: Configure Snowflake connections
+              run: |
+                # Config file for DPE_ENGINEER
+                config_file_dpe=$(mktemp)
+                echo 'default_connection_name = "dpe"' >> $config_file_dpe
+                echo '[connections.dpe]' >> $config_file_dpe
+                echo "account = \"${SNOWFLAKE_ACCOUNT}\"" >> $config_file_dpe
+                echo "user = \"${SNOWFLAKE_USER}\"" >> $config_file_dpe
+                echo "role = \"${SNOWFLAKE_CLONE_ROLE}\"" >> $config_file_dpe
+                echo "password = \"${SNOWFLAKE_PASSWORD}\"" >> $config_file_dpe
+                echo "warehouse = \"${SNOWFLAKE_WAREHOUSE}\"" >> $config_file_dpe
+                echo 'authenticator = "SNOWFLAKE"' >> $config_file_dpe
+
+                # Config file for SYSADMIN
+                echo '[connections.sysadmin]' >> $config_file_dpe
+                echo "account = \"${SNOWFLAKE_ACCOUNT}\"" >> $config_file_dpe
+                echo "user = \"${SNOWFLAKE_USER}\"" >> $config_file_dpe
+                echo "role = \"${SNOWFLAKE_SCHEMACHANGE_ROLE}\"" >> $config_file_dpe
+                echo "password = \"${SNOWFLAKE_PASSWORD}\"" >> $config_file_dpe
+                echo "warehouse = \"${SNOWFLAKE_WAREHOUSE}\"" >> $config_file_dpe
+                echo 'authenticator = "SNOWFLAKE"' >> $config_file_dpe
+
+                # Write config paths to environment
+                echo "SNOWFLAKE_CONFIG_PATH_DPE=$config_file_dpe" >> $GITHUB_ENV
+
+            - name: Install Snowflake CLI with DPE_ENGINEER config
+              uses: Snowflake-Labs/[email protected]
+              with:
+                default-config-file-path: ${{ env.SNOWFLAKE_CONFIG_PATH_DPE }}
+
+            - name: Verify Snowflake CLI installation and connections
+              run: |
+                snow --version
+                snow connection test -c sysadmin
+                snow connection test -c dpe
+
+            - name: Sanitize Clone Name
+              run: |
+                CLONE_NAME_SANITIZED="${CLONE_NAME//[^a-zA-Z0-9_]/_}"
+                echo "Clone name has been updated! The clone name will be: ${CLONE_NAME_SANITIZED}"
+                echo "SNOWFLAKE_SYNAPSE_DATA_WAREHOUSE_DATABASE=${CLONE_NAME_SANITIZED}" >> $GITHUB_ENV
+
+            - name: Zero-copy clone the database
+              shell: bash
+              run: |
+                snow sql -q "CREATE OR REPLACE DATABASE $SNOWFLAKE_SYNAPSE_DATA_WAREHOUSE_DATABASE CLONE $SNOWFLAKE_SYNAPSE_DATA_WAREHOUSE_DATABASE_ORIG;"
+
+            - name: Grant permissions to DPE_ENGINEER on cloned database
+              shell: bash
+              run: |
+                snow connection set-default sysadmin
+                snow sql -q "GRANT ALL PRIVILEGES ON ALL TABLES IN SCHEMA ${SNOWFLAKE_SYNAPSE_DATA_WAREHOUSE_DATABASE}.SYNAPSE TO ROLE DPE_ENGINEER;"
+                snow sql -q "GRANT ALL PRIVILEGES ON ALL TABLES IN SCHEMA ${SNOWFLAKE_SYNAPSE_DATA_WAREHOUSE_DATABASE}.SYNAPSE_RAW TO ROLE DPE_ENGINEER;"
+
+            - name: Run schemachange on the clone as DPE_ENGINEER
+              shell: bash
+              run: |
+                schemachange \
+                -f synapse_data_warehouse \
+                -a $SNOWFLAKE_ACCOUNT \
+                -u $SNOWFLAKE_USER \
+                -r $SNOWFLAKE_CLONE_ROLE \
+                -w $SNOWFLAKE_WAREHOUSE \
+                --config-folder synapse_data_warehouse
+
+    drop_clone:
+      runs-on: ubuntu-latest
+      if: github.event.pull_request.merged == true || github.event.action == 'closed'
+      environment: dev
+      env:
+          SNOWFLAKE_PASSWORD: ${{ secrets.SNOWSQL_PWD }}
+          SNOWFLAKE_ACCOUNT: ${{ secrets.SNOWSQL_ACCOUNT }}
+          SNOWFLAKE_USER: ${{ secrets.SNOWSQL_USER }}
+          SNOWFLAKE_WAREHOUSE: ${{ secrets.SNOWSQL_WAREHOUSE }}
+          SNOWFLAKE_CLONE_ROLE: DATA_ENGINEER
+          CLONE_NAME: "${{ vars.SNOWFLAKE_SYNAPSE_DATA_WAREHOUSE_DATABASE }}_${{ github.head_ref }}"
+
+      steps:
+
+          - uses: actions/checkout@v4
+          - uses: actions/setup-python@v4
+            with:
+              python-version: '3.10'
+
+          - name: Configure Snowflake connection
+            run: |
+              # Create temporary files for config.toml and our private key
+              config_file=$(mktemp)
+
+              # Write to config.toml file
+              echo 'default_connection_name = "dpe"' >> $config_file
+              echo '[connections.dpe]' >> $config_file
+              echo "account = \"${SNOWFLAKE_ACCOUNT}\"" >> $config_file
+              echo "user = \"${SNOWFLAKE_USER}\"" >> $config_file
+              echo "role = \"${SNOWFLAKE_CLONE_ROLE}\"" >> $config_file
+              echo "password = \"${SNOWFLAKE_PASSWORD}\"" >> $config_file
+              echo "warehouse = \"${SNOWFLAKE_WAREHOUSE}\"" >> $config_file
+              echo 'authenticator = "SNOWFLAKE"' >> $config_file
+
+              # Write config.toml path to global environment
+              echo "SNOWFLAKE_CONFIG_PATH=$config_file" >> $GITHUB_ENV
+
+          - name: Install Snowflake CLI
+            uses: Snowflake-Labs/[email protected]
+            with:
+              default-config-file-path: ${{ env.SNOWFLAKE_CONFIG_PATH }}
+
+          - name: Verify Snowflake CLI installation and connection
+            run: |
+              snow --version
+              snow connection test
+
+          - name: Sanitize Clone Name
+            run: |
+              CLONE_NAME_SANITIZED="${CLONE_NAME//[^a-zA-Z0-9_]/_}"
+              echo "SNOWFLAKE_SYNAPSE_DATA_WAREHOUSE_DATABASE=${CLONE_NAME_SANITIZED}" >> $GITHUB_ENV
+              echo "Clone name has been updated! The clone name will be: ${SNOWFLAKE_SYNAPSE_DATA_WAREHOUSE_DATABASE}"
+              echo $SNOWFLAKE_SYNAPSE_DATA_WAREHOUSE_DATABASE
+
+          - name: Drop the clone
+            shell: bash
+            run: |
+              snow sql -r $SNOWFLAKE_CLONE_ROLE -q "DROP DATABASE IF EXISTS $SNOWFLAKE_SYNAPSE_DATA_WAREHOUSE_DATABASE;"
@@ -0,0 +1,109 @@
+# Contributing Guidelines
+
+Welcome, and thanks for your interest in contributing to the `snowflake` repository! :snowflake:
+
+By contributing, you are agreeing that we may redistribute your work under this [license](https://github.com/Sage-Bionetworks/snowflake/tree/snow-90-auto-db-clone?tab=License-1-ov-file#).
+
+## Getting Started
+
+To start contributing, follow these steps to set up and develop on your local repository:
+
+### 1. Clone the Repository
+
+```bash
+git clone https://github.com/Sage-Bionetworks/snowflake
+```
+
+### 2. Fetch the Latest `dev` Branch
+
+After cloning, navigate to the repository directory:
+
+```bash
+cd snowflake
+```
+
+Then, fetch the latest updates from the `dev` branch to ensure you’re working with the latest codebase:
+
+```bash
+git fetch origin dev
+```
+
+### 3. Create a New Branch Off `dev`
+
+Create and checkout your feature branch from the latest `dev` branch. Name it based on the Jira ticket number and your feature/fix. For example:
+
+```bash
+git checkout -b snow-123-new-feature origin/dev
+```
+
+Your branch will now be tracking `origin/dev` which you can merge with or rebase onto should a merge conflict occur. For more guidance
+on how to resolve merge conflicts, [see here](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/addressing-merge-conflicts/about-merge-conflicts#resolving-merge-conflicts).
+
+> [!IMPORTANT]
+> If you plan to run the automated testing described in section [Running CI Jobs for Database Testing](#running-ci-jobs-for-database-testing), your branch name needs to start with `snow-`,
+> otherwise the test deployment will fail.
+
+### 4. Push to The Remote Branch
+
+Once you've made your changes and committed them locally, push your branch to the remote repository:
+
+```
+git push origin snow-123-new-feature
+```
+
+### 5. Create a Draft Pull Request
+
+In order to initiate automated testing you will need to work on a draft pull request (PR) on GitHub. After pushing your commits to
+the remote branch in Step 4, use the GitHub UI to initate a PR and convert it to draft mode.
+
+After testing your changes against `schemachange` using the instructions in [Running CI Jobs for Database Testing](#running-ci-jobs-for-database-testing),
+you can then take your PR out of draft mode by marking it as Ready for Review in the GitHub UI.
+
+## Running CI Jobs for Database Testing
+
+This repository includes automated CI jobs to validate changes against a cloned database. If you want to trigger these jobs to test your changes in an isolated database environment, please follow the steps below:
+
+### 1. Add the Label
+
+Add the label `create_clone_and_run_schemachange` to your PR to trigger the CI workflow. This job does two things:
+
+* Creates a zero-copy clone of the database and runs your proposed schema changes against it.
+* Tests your schema changes on a cloned version of the development database, verifying that your updates work correctly without
+affecting the real development database. After the PR is merged, the clone is automatically dropped to free up resources.
+
+> [!IMPORTANT]
+> Your cloned database is a clone of the development database as it exists at the time of cloning. Please be mindful that
+> **there may have been changes made to the development database since your last clone**.
+
+> [!NOTE]
+> As you are developing on your branch, you may want to re-run the `schemachange` test on your updates.
+> You can unlabel and relabel the PR with `create_clone_and_run_schemachange` to re-trigger the job.
+
+### 2. Perform Inspection using Snowsight
+
+You can go on Snowsight to perform manual inspection of the schema changes in your cloned database. We recommend using a SQL worksheet for manual quality assurance queries, e.g. ensure there is no row duplication in the new/updated tables.
+
+> [!TIP]
+> Your database will be named after your feature branch so it's easy to find on Snowsight. For example, if your feature branch is called
+> `snow-123-new-feature`, your database might be called `SYNAPSE_DATA_WAREHOUSE_DEV_SNOW_123_NEW_FEATURE`.
+
+### 3. Manually Drop the Cloned Database (Optional)
+
+There is a second job in the repository (`drop_clone`) that will drop your branch's database clone once it has been merged into `dev`.
+In other words, once your cloned database is created for testing, it will remain open until your PR is closed (unless you manually drop it).
+
+An initial clone of the development database will not incur new resource costs, **HOWEVER**, when a clone deviates from the original
+(e.g. new schema changes are applied for testing), the cloned database will begin to incur costs the longer it exists in our warehouse.
+**Please be mindful of the amount of time your PR stays open**, as cloned databases do not get dropped until a PR is merged. For example, if your PR is open for >1 week, consider manually dropping your cloned database on Snowflake to avoid unnecessary cost.
+
+> [!NOTE]
+> Keep in mind that after dropping your cloned database, you will still have access to it through Snowflake's "Time Travel"
+> feature. Your database is retained through "Time Travel" for X amount of time before it is permanently deleted. To see
+> how long your database can be accessed after dropping, run the following query in a SQL worksheet on Snowsight and look
+> for the keyword `DATA_RETENTION_TIME_IN_DAYS`:
+> 
+> ```
+> SHOW PARAMETERS IN DATABASE <your-database-name>;
+> ```
+
+Following these guidelines helps maintain a clean, efficient, and well-tested codebase. Thank you for contributing!
@@ -54,7 +54,7 @@ pip install "snowflake-connector-python[pandas]" "synapseclient[pandas]" python-
 
 ## Contributing
 
-WIP
+For contribution guidelines, please see the `CONTRIBUTING.md` file in this repository.
 
 ## Visualizing with Streamlit
 

@@ -0,0 +1,17 @@
+USE SCHEMA {{database_name}}.synapse; --noqa: JJ01,PRS,TMP
+
+-- Create the dummy table with example columns
+CREATE TABLE IF NOT EXISTS my_table2 (
+    id INT,
+    name STRING,
+    created_at TIMESTAMP,
+    value FLOAT
+);
+
+-- Insert arbitrary rows into the dummy table
+INSERT INTO my_table2 (id, name, created_at, value) VALUES 
+    (1, 'Alpha', CURRENT_TIMESTAMP, 1000.5),
+    (2, 'Beta', CURRENT_TIMESTAMP, 20.0),
+    (3, 'Gamma', CURRENT_TIMESTAMP, 30.75),
+    (4, 'Delta', CURRENT_TIMESTAMP, 40.1),
+    (5, 'Epsilon', CURRENT_TIMESTAMP, 50.9);