argilla-io · frascuchon · Mar 9, 2023 · Feb 7, 2023 · Feb 7, 2023 · Feb 7, 2023
diff --git a/.github/workflows/package.yml b/.github/workflows/package.yml
@@ -208,6 +208,11 @@ jobs:
     defaults:
       run:
         shell: bash -l {0}
+    # Only build the package if we can deploy it as a docker image
+    env:
+      IS_DEPLOYABLE: ${{ secrets.AR_DOCKER_USERNAME != '' }}
+    outputs:
+      code_changes: ${{ steps.filter.outputs.code_changes }}
 
     steps:
       - name: Checkout Code 🛎
@@ -228,7 +233,7 @@ jobs:
               - '.github/workflows/package.yml'
       - name: Cache pip 👜
         uses: actions/cache@v2
-        if: steps.filter.outputs.code_changes == 'true'
+        if: steps.filter.outputs.code_changes == 'true' && env.IS_DEPLOYABLE == 'true'
         env:
           # Increase this value to reset cache if pyproject.toml has not changed
           CACHE_NUMBER: 0
@@ -238,18 +243,18 @@ jobs:
 
       - name: Setup Node.js
         uses: actions/setup-node@v2
-        if: steps.filter.outputs.code_changes == 'true'
+        if: steps.filter.outputs.code_changes == 'true' && env.IS_DEPLOYABLE == 'true'
         with:
           node-version: "14"
 
       - name: Build Package 🍟
-        if: steps.filter.outputs.code_changes == 'true'
+        if: steps.filter.outputs.code_changes == 'true' && env.IS_DEPLOYABLE == 'true'
         run: |
           pip install -U build
           scripts/build_distribution.sh
 
       - name: Upload package artifact
-        if: steps.filter.outputs.code_changes == 'true'
+        if: steps.filter.outputs.code_changes == 'true' && env.IS_DEPLOYABLE == 'true'
         uses: actions/upload-artifact@v2
         with:
           name: python-package
@@ -262,6 +267,9 @@ jobs:
       - build
       - test-elastic
       - test-opensearch
+    env:
+      IS_DEPLOYABLE: ${{ secrets.AR_DOCKER_USERNAME != '' }}
+    if: needs.build.outputs.code_changes == 'true'
     strategy:
       matrix:
         include:
@@ -281,30 +289,36 @@ jobs:
     steps:
       - name: Checkout Code 🛎
         uses: actions/checkout@v2
+        if: env.IS_DEPLOYABLE == 'true'
 
       - name: Download python package
         uses: actions/download-artifact@v2
         with:
           name: python-package
           path: dist
+        if: env.IS_DEPLOYABLE == 'true'
 
       - name: Set up QEMU
         uses: docker/setup-qemu-action@v2
+        if: env.IS_DEPLOYABLE == 'true'
 
       - name: Set up Docker Buildx
         uses: docker/setup-buildx-action@v2
+        if: env.IS_DEPLOYABLE == 'true'
 
       - name: Docker meta
         id: meta
         uses: crazy-max/ghaction-docker-meta@v2
         with:
           images: ${{ matrix.image }}
+        if: env.IS_DEPLOYABLE == 'true'
 
       - name: Login to DockerHub
         uses: docker/login-action@v1
         with:
           username: ${{ secrets.AR_DOCKER_USERNAME }}
           password: ${{ secrets.AR_DOCKER_PASSWORD }}
+        if: env.IS_DEPLOYABLE == 'true'
 
       - name: Build & push Docker image
         uses: docker/build-push-action@v2
@@ -315,6 +329,7 @@ jobs:
           tags: ${{ steps.meta.outputs.tags }}
           labels: ${{ steps.meta.outputs.labels }}
           push: true
+        if: env.IS_DEPLOYABLE == 'true'
 
       - name: Docker Hub Description
         uses: peter-evans/dockerhub-description@v3
@@ -323,6 +338,7 @@ jobs:
           password: ${{ secrets.AR_DOCKER_PASSWORD }}
           repository: ${{ matrix.image }}
           readme-filepath: ${{ matrix.readme }}
+        if: env.IS_DEPLOYABLE == 'true'
 
   # This job will upload a Python Package using Twine when a release is created
   # For more information see:

diff --git a/.github/workflows/pyre-check.yml b/.github/workflows/pyre-check.yml
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -20,15 +20,19 @@ repos:
           # - --remove-header
 
   - repo: https://github.com/psf/black
-    rev: 22.12.0
+    rev: 23.1.0
     hooks:
       - id: black
         additional_dependencies: ["click==8.0.4"]
 
-  - repo: https://github.com/pycqa/isort
-    rev: 5.11.5
+  - repo: https://github.com/charliermarsh/ruff-pre-commit
+    rev: v0.0.254
     hooks:
-      - id: isort
+      # Simulate isort via (the much faster) ruff
+      - id: ruff
+        args:
+          - --select=I
+          - --fix
 
   - repo: https://github.com/alessandrojcm/commitlint-pre-commit-hook
     rev: v9.4.0

diff --git a/.pyre_configuration b/.pyre_configuration
diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md
@@ -60,7 +60,7 @@ representative at an online or offline event.
 
 Instances of abusive, harassing, or otherwise unacceptable behavior may be
 reported to the community leaders responsible for enforcement at
-contact@recogn.ai.
+contact@argilla.io.
 All complaints will be reviewed and investigated promptly and fairly.
 
 All community leaders are obligated to respect the privacy and security of the

diff --git a/README.md b/README.md
@@ -16,19 +16,27 @@
 <a href="https://pepy.tech/project/argilla">
 <img  alt="CI"  src="https://static.pepy.tech/personalized-badge/argilla?period=month&units=international_system&left_color=grey&right_color=blue&left_text=pypi%20downloads/month">
 </a>
+<a  href="https://huggingface.co/new-space?template=argilla/argilla-template-space">
+<img src="https://huggingface.co/datasets/huggingface/badges/raw/main/deploy-to-spaces-sm.svg" />
+</a>
 </p>
 
-<h2 align="center">Open-source framework for data-centric NLP</h2>
-<p align="center">Data Labeling, curation, and Inference Store</p>
-<p align="center">Designed for MLOps & Feedback Loops</p>
+<h2 align="center">Open-source platform for data-centric NLP</h2>
+<p align="center">Data Labeling for MLOps & Feedback Loops</p>
 
 
-> 🆕 🔥 Play with Argilla UI with this [live-demo](https://argilla-live-demo.hf.space) powered by Hugging Face Spaces (login:`argilla`, password:`1234`)
 
-> 🆕 🔥 Since `1.2.0` Argilla supports vector search for finding the most similar records to a given one. This feature uses vector or semantic search combined with more traditional search (keyword and filter based). Learn more on this [deep-dive guide](https://docs.argilla.io/en/latest/guides/features/semantic-search.html)
 
+https://user-images.githubusercontent.com/1107111/223220683-fbfa63da-367c-4cfa-bda5-66f47413b6b0.mp4
+
+<br />
+
+> 🆕 🔥 Train custom transformers models with no-code: [Argilla + AutoTrain](https://www.argilla.io/blog/argilla-meets-autotrain)
+
+> 🆕 🔥 Deploy [Argilla on Spaces](https://huggingface.co/new-space?template=argilla/argilla-template-space)
+
+> 🆕 🔥 Since `1.2.0` Argilla supports vector search for finding the most similar records to a given one. This feature uses vector or semantic search combined with more traditional search (keyword and filter based). Learn more on this [deep-dive guide](https://docs.argilla.io/en/latest/guides/features/semantic-search.html)
 
-![imagen](https://user-images.githubusercontent.com/1107111/204772677-facee627-9b3b-43ca-8533-bbc9b4e2d0aa.png)
 
 <!-- https://user-images.githubusercontent.com/25269220/200496945-7efb11b8-19f3-4793-bb1d-d42132009cbb.mp4 -->
 
@@ -62,7 +70,7 @@
 
 ### Advanced NLP labeling
 
-- Programmatic labeling using [weak supervision](https://docs.argilla.io/en/latest/guides/techniques/weak_supervision.html). Built-in label models (Snorkel, Flyingsquid)
+- Programmatic labeling using [rules and weak supervision](https://docs.argilla.io/en/latest/guides/programmatic_labeling_with_rules.html). Built-in label models (Snorkel, Flyingsquid)
 - [Bulk-labeling](https://docs.argilla.io/en/latest/reference/webapp/features.html#bulk-annotate) and [search-driven annotation](https://docs.argilla.io/en/latest/guides/features/queries.html)
 - Iterate on training data with any [pre-trained model](https://docs.argilla.io/en/latest/tutorials/libraries/huggingface.html) or [library](https://docs.argilla.io/en/latest/tutorials/libraries/libraries.html)
 - Efficiently review and refine annotations in the UI and with Python
@@ -72,93 +80,34 @@
 ### Monitoring
 
 - Close the gap between production data and data collection activities
-- [Auto-monitoring](https://docs.argilla.io/en/latest/guides/steps/3_deploying.html) for [major NLP libraries and pipelines](https://docs.argilla.io/en/latest/tutorials/libraries/libraries.html) (spaCy, Hugging Face, FlairNLP)
+- [Auto-monitoring](https://docs.argilla.io/en/latest/guides/log_load_and_prepare_data.html) for [major NLP libraries and pipelines](https://docs.argilla.io/en/latest/tutorials/libraries/libraries.html) (spaCy, Hugging Face, FlairNLP)
 - [ASGI middleware](https://docs.argilla.io/en/latest/tutorials/notebooks/deploying-texttokenclassification-fastapi.html) for HTTP endpoints
-- Argilla Metrics to understand data and model issues, [like entity consistency for NER models](https://docs.argilla.io/en/latest/guides/steps/4_monitoring.html)
+- Argilla Metrics to understand data and model issues, [like entity consistency for NER models](https://docs.argilla.io/en/latest/guides/measure_datasets_with_metrics.html)
 - Integrated with Kibana for custom dashboards
 
 ### Team workspaces
 
 - Bring different users and roles into the NLP data and model lifecycles
-- Organize data collection, review and monitoring into different [workspaces](https://docs.argilla.io/en/latest/getting_started/installation/user_management.html#workspace)
+- Organize data collection, review and monitoring into different [workspaces](https://docs.argilla.io/en/latest/getting_started/installation/configurations/user_management.html)
 - Manage workspace access for different users
 
 ## Quickstart
-Argilla is composed of a `Python Server` with Elasticsearch as the database layer, and a `Python Client` to create and manage datasets.
-
-To get started you need to **install the client and the server** with `pip`:
-```bash
-
-pip install "argilla[server]"
-
-```
-
-Then you need to **run [Elasticsearch (ES)](https://www.elastic.co/elasticsearch)**.
-
-The simplest way is to use`Docker` by running:
-
-```bash
-
-docker run -d --name elasticsearch-for-argilla --network argilla-net  -p 9200:9200 -p 9300:9300 -e "ES_JAVA_OPTS=-Xms512m -Xmx512m" -e "discovery.type=single-node" docker.elastic.co/elasticsearch/elasticsearch:8.5.3
-
-```
-> :information_source: **Check [the docs](https://docs.argilla.io/en/latest/getting_started/quickstart.html) for further options and configurations for Elasticsearch.**
+👋 Welcome! If you have just discovered Argilla this is the best place to get started. Argilla is composed of:
 
-Finally you can **launch the server**:
-
-```bash
-
-python -m argilla
-
-```
-> :information_source:  The most common error message after this step is related to the Elasticsearch instance not running. Make sure your Elasticsearch instance is running on http://localhost:9200/. If you already have an Elasticsearch instance or cluster, you point the server to its URL by using [ENV variables](#)
+* Argilla Client: a powerful Python library for reading and writing data into Argilla, using all the libraries you love (transformers, spaCy, datasets, and any other).
 
+* Argilla Server and UI: the API and UI for data annotation and curation.
 
-🎉 You can now access Argilla UI pointing your browser at http://localhost:6900/.
+To get started you need to:
 
-**The default username and password are**  `argilla`  **and**  `1234`.
+1. Launch the Argilla Server and UI.
 
-Your workspace will contain no datasets. So let's use the `datasets` library to create our first datasets!
-
-First, you need to install `datasets`:
-```bash
-
-pip install datasets
-
-```
-
-Then go to your Python IDE of choice and run:
-```python
-
-import pandas as pd
-import argilla as rg
-from datasets import load_dataset
-
-# load dataset from the hub
-dataset = load_dataset("argilla/gutenberg_spacy-ner", split="train")
-
-# read in dataset, assuming its a dataset for text classification
-dataset_rg = rg.read_datasets(dataset, task="TokenClassification")
-
-# log the dataset to the Argilla web app
-rg.log(dataset_rg, "gutenberg_spacy-ner")
-
-# load dataset from json
-my_dataframe = pd.read_json(
-    "https://raw.githubusercontent.com/recognai/datasets/main/sst-sentimentclassification.json")
-
-# convert pandas dataframe to DatasetForTextClassification
-dataset_rg = rg.DatasetForTextClassification.from_pandas(my_dataframe)
-
-# log the dataset to the Argilla web app
-rg.log(dataset_rg, name="sst-sentimentclassification")
-```
+2. Pick a tutorial and start rocking with Argilla using Jupyter Notebooks, or Google Colab.
 
-This will create two datasets that you can use to do a quick tour of the core features of Argilla.
+To get started follow the steps [on the Quickstart docs page](https://docs.argilla.io/en/latest/getting_started/quickstart.html).
 
 > 🚒 **If you find issues, get direct support from the team and other community members on the [Slack Community](https://join.slack.com/t/rubrixworkspace/shared_invite/zt-whigkyjn-a3IUJLD7gDbTZ0rKlvcJ5g)**
 
-For getting started with your own use cases, [go to the docs](https://docs.argilla.io).
 ## Principles
 -  **Open**: Argilla is free, open-source, and 100% compatible with major NLP libraries (Hugging Face transformers, spaCy, Stanford Stanza, Flair, etc.). In fact, you can **use and combine your preferred libraries** without implementing any specific interface.
 

diff --git a/docs/_source/_static/reference/webapp/features-annotate.png b/docs/_source/_static/reference/webapp/features-annotate.png
diff --git a/docs/_source/_static/reference/webapp/features-bulk-actions.png b/docs/_source/_static/reference/webapp/features-bulk-actions.png
diff --git a/docs/_source/_static/reference/webapp/features-multi_textclass_record.png b/docs/_source/_static/reference/webapp/features-multi_textclass_record.png
diff --git a/docs/_source/_static/reference/webapp/features-multiclass-bulk-labels.png b/docs/_source/_static/reference/webapp/features-multiclass-bulk-labels.png
diff --git a/docs/_source/_static/reference/webapp/features-single_textclass_record.png b/docs/_source/_static/reference/webapp/features-single_textclass_record.png
diff --git a/docs/_source/_static/reference/webapp/features-text2text_record.png b/docs/_source/_static/reference/webapp/features-text2text_record.png
diff --git a/docs/_source/_static/reference/webapp/features-tokclass_record.png b/docs/_source/_static/reference/webapp/features-tokclass_record.png
diff --git a/docs/_source/_static/reference/webapp/homepage.png b/docs/_source/_static/reference/webapp/homepage.png
diff --git a/docs/_source/_static/reference/webapp/pages-dataset_page.png b/docs/_source/_static/reference/webapp/pages-dataset_page.png
diff --git a/docs/_source/_static/reference/webapp/pages-homepage.png b/docs/_source/_static/reference/webapp/pages-homepage.png