Skip to content

Commit

Permalink
Merge branch 'feature/support-for-metadata-filtering-and-sorting' int…
Browse files Browse the repository at this point in the history
…o feature/create-and-list-metadata-properties-from-python-client
  • Loading branch information
gabrielmbmb committed Sep 25, 2023
2 parents 4f3e95b + 81cda57 commit ac5c987
Show file tree
Hide file tree
Showing 7 changed files with 478 additions and 9 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,11 @@ These are the section headers that we use:

## [Unreleased]()

### Added

- Added new endpoint `POST /api/v1/datasets/:dataset_id/metadata-properties` for dataset metadata property creation ([#3813](https://github.com/argilla-io/argilla/pull/3813))
- Added new endpoint `GET /api/v1/datasets/:dataset_id/metadata-properties` for listing dataset metadata property ([#3813](https://github.com/argilla-io/argilla/pull/3813))

### Changed

- Updated `Dockerfile` to use multi stage build ([#3221](https://github.com/argilla-io/argilla/pull/3221) and [#3793](https://github.com/argilla-io/argilla/pull/3793)).
Expand Down
58 changes: 56 additions & 2 deletions src/argilla/server/apis/v1/handlers/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,9 @@
Field,
FieldCreate,
Fields,
MetadataProperties,
MetadataProperty,
MetadataPropertyCreate,
Metrics,
Question,
QuestionCreate,
Expand All @@ -56,9 +59,19 @@


async def _get_dataset(
db: AsyncSession, dataset_id: UUID, with_fields: bool = False, with_questions: bool = False
db: AsyncSession,
dataset_id: UUID,
with_fields: bool = False,
with_questions: bool = False,
with_metadata_properties: bool = False,
) -> DatasetModel:
dataset = await datasets.get_dataset_by_id(db, dataset_id, with_fields=with_fields, with_questions=with_questions)
dataset = await datasets.get_dataset_by_id(
db,
dataset_id,
with_fields=with_fields,
with_questions=with_questions,
with_metadata_properties=with_metadata_properties,
)
if not dataset:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
Expand Down Expand Up @@ -109,6 +122,17 @@ async def list_dataset_questions(
return Questions(items=dataset.questions)


@router.get("/datasets/{dataset_id}/metadata-properties", response_model=MetadataProperties)
async def list_dataset_metadata_properties(
*, db: AsyncSession = Depends(get_async_db), dataset_id: UUID, current_user: User = Security(auth.get_current_user)
):
dataset = await _get_dataset(db, dataset_id, with_metadata_properties=True)

await authorize(current_user, DatasetPolicyV1.get(dataset))

return MetadataProperties(items=dataset.metadata_properties)


@router.get("/me/datasets/{dataset_id}/records", response_model=Records, response_model_exclude_unset=True)
async def list_current_user_dataset_records(
*,
Expand Down Expand Up @@ -276,6 +300,36 @@ async def create_dataset_question(
raise HTTPException(status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, detail=str(err))


@router.post(
"/datasets/{dataset_id}/metadata-properties", status_code=status.HTTP_201_CREATED, response_model=MetadataProperty
)
async def create_dataset_metadata_property(
*,
db: AsyncSession = Depends(get_async_db),
dataset_id: UUID,
metadata_prop_create: MetadataPropertyCreate,
current_user: User = Security(auth.get_current_user),
):
dataset = await _get_dataset(db, dataset_id)

await authorize(current_user, DatasetPolicyV1.create_metadata_property(dataset))

if await datasets.get_metadata_property_by_name_and_dataset_id(db, metadata_prop_create.name, dataset_id):
raise HTTPException(
status_code=status.HTTP_409_CONFLICT,
detail=f"Metadata property with name `{metadata_prop_create.name}` "
f"already exists for dataset with id `{dataset_id}`",
)

# TODO: We should split API v1 into different FastAPI apps so we can customize error management.
# After mapping ValueError to 422 errors for API v1 then we can remove this try except.
try:
metadata_property = await datasets.create_metadata_property(db, dataset, metadata_prop_create)
return metadata_property
except ValueError as err:
raise HTTPException(status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, detail=str(err))


@router.post("/datasets/{dataset_id}/records", status_code=status.HTTP_204_NO_CONTENT)
async def create_dataset_records(
*,
Expand Down
41 changes: 37 additions & 4 deletions src/argilla/server/contexts/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,11 @@
from sqlalchemy.orm import contains_eager, joinedload, selectinload

from argilla.server.contexts import accounts
from argilla.server.enums import RecordInclude, ResponseStatusFilter
from argilla.server.enums import DatasetStatus, RecordInclude, ResponseStatusFilter
from argilla.server.models import (
Dataset,
DatasetStatus,
Field,
MetadataProperty,
Question,
Record,
Response,
Expand All @@ -33,7 +33,13 @@
Suggestion,
)
from argilla.server.models.suggestions import SuggestionCreateWithRecordId
from argilla.server.schemas.v1.datasets import DatasetCreate, FieldCreate, QuestionCreate, RecordsCreate
from argilla.server.schemas.v1.datasets import (
DatasetCreate,
FieldCreate,
MetadataPropertyCreate,
QuestionCreate,
RecordsCreate,
)
from argilla.server.schemas.v1.records import ResponseCreate
from argilla.server.schemas.v1.responses import ResponseUpdate
from argilla.server.search_engine import SearchEngine
Expand All @@ -51,14 +57,20 @@


async def get_dataset_by_id(
db: "AsyncSession", dataset_id: UUID, with_fields: bool = False, with_questions: bool = False
db: "AsyncSession",
dataset_id: UUID,
with_fields: bool = False,
with_questions: bool = False,
with_metadata_properties: bool = False,
) -> Dataset:
query = select(Dataset).filter_by(id=dataset_id)
options = []
if with_fields:
options.append(selectinload(Dataset.fields))
if with_questions:
options.append(selectinload(Dataset.questions))
if with_metadata_properties:
options.append(selectinload(Dataset.metadata_properties))
if options:
query = query.options(*options)
result = await db.execute(query)
Expand Down Expand Up @@ -183,6 +195,13 @@ async def get_question_by_name_and_dataset_id(db: "AsyncSession", name: str, dat
return result.scalar_one_or_none()


async def get_metadata_property_by_name_and_dataset_id(
db: "AsyncSession", name: str, dataset_id: UUID
) -> Union[MetadataProperty, None]:
result = await db.execute(select(MetadataProperty).filter_by(name=name, dataset_id=dataset_id))
return result.scalar_one_or_none()


async def create_question(db: "AsyncSession", dataset: Dataset, question_create: QuestionCreate) -> Question:
if dataset.is_ready:
raise ValueError("Question cannot be created for a published dataset")
Expand All @@ -198,6 +217,20 @@ async def create_question(db: "AsyncSession", dataset: Dataset, question_create:
)


async def create_metadata_property(
db: "AsyncSession", dataset: Dataset, metadata_prop_create: MetadataPropertyCreate
) -> MetadataProperty:
metadata_property = await MetadataProperty.create(
db,
name=metadata_prop_create.name,
type=metadata_prop_create.settings.type,
description=metadata_prop_create.description,
settings=metadata_prop_create.settings.dict(),
dataset_id=dataset.id,
)
return metadata_property


async def update_question(db: "AsyncSession", question: Question, question_update: "QuestionUpdate") -> Question:
params = question_update.dict(exclude_unset=True)
return await question.update(db, **params)
Expand Down
9 changes: 9 additions & 0 deletions src/argilla/server/policies.py
Original file line number Diff line number Diff line change
Expand Up @@ -313,6 +313,15 @@ async def is_allowed(actor: User) -> bool:

return is_allowed

@classmethod
def create_metadata_property(cls, dataset: Dataset):
async def is_allowed(actor: User) -> bool:
return actor.is_owner or (
actor.is_admin and await _exists_workspace_user_by_user_and_workspace_id(actor, dataset.workspace_id)
)

return is_allowed


class FieldPolicyV1:
@classmethod
Expand Down
59 changes: 57 additions & 2 deletions src/argilla/server/schemas/v1/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@
except ImportError:
from typing_extensions import Annotated

from argilla.server.enums import FieldType
from argilla.server.models import DatasetStatus, QuestionSettings, QuestionType, ResponseStatus
from argilla.server.enums import DatasetStatus, FieldType, MetadataPropertyType
from argilla.server.models import QuestionSettings, QuestionType, ResponseStatus

DATASET_NAME_REGEX = r"^(?!-|_)[a-zA-Z0-9-_ ]+$"
DATASET_NAME_MIN_LENGTH = 1
Expand All @@ -52,6 +52,12 @@
QUESTION_CREATE_DESCRIPTION_MIN_LENGTH = 1
QUESTION_CREATE_DESCRIPTION_MAX_LENGTH = 1000

METADATA_PROPERTY_CREATE_NAME_REGEX = r"^(?=.*[a-z0-9])[a-z0-9_-]+$"
METADATA_PROPERTY_CREATE_NAME_MIN_LENGTH = 1
METADATA_PROPERTY_CREATE_NAME_MAX_LENGTH = 50
METADATA_PROPERTY_CREATE_DESCRIPTION_MIN_LENGTH = 1
METADATA_PROPERTY_CREATE_DESCRIPTION_MAX_LENGTH = 1000

RATING_OPTIONS_MIN_ITEMS = 2
RATING_OPTIONS_MAX_ITEMS = 10

Expand Down Expand Up @@ -415,6 +421,55 @@ class RecordsCreate(BaseModel):
items: conlist(item_type=RecordCreate, min_items=RECORDS_CREATE_MIN_ITEMS, max_items=RECORDS_CREATE_MAX_ITEMS)


class TermsMetadataProperty(BaseModel):
type: Literal[MetadataPropertyType.terms]


class IntegerMetadataProperty(BaseModel):
type: Literal[MetadataPropertyType.integer]


class FloatMetadataProperty(BaseModel):
type: Literal[MetadataPropertyType.float]


MetadataPropertySettings = Annotated[
Union[TermsMetadataProperty, IntegerMetadataProperty, FloatMetadataProperty],
PydanticField(..., discriminator="type"),
]


class MetadataPropertyCreate(BaseModel):
name: constr(
regex=METADATA_PROPERTY_CREATE_NAME_REGEX,
min_length=METADATA_PROPERTY_CREATE_NAME_MIN_LENGTH,
max_length=METADATA_PROPERTY_CREATE_NAME_MAX_LENGTH,
)
description: Optional[
constr(
min_length=METADATA_PROPERTY_CREATE_DESCRIPTION_MIN_LENGTH,
max_length=METADATA_PROPERTY_CREATE_DESCRIPTION_MAX_LENGTH,
)
] = None
settings: MetadataPropertySettings


class MetadataProperty(BaseModel):
id: UUID
name: str
description: Optional[str] = None
settings: MetadataPropertySettings
inserted_at: datetime
updated_at: datetime

class Config:
orm_mode = True


class MetadataProperties(BaseModel):
items: List[MetadataProperty]


class SearchRecordsQuery(BaseModel):
query: Query

Expand Down
27 changes: 26 additions & 1 deletion tests/factories.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,11 @@
import inspect

import factory
from argilla.server.enums import FieldType
from argilla.server.enums import FieldType, MetadataPropertyType
from argilla.server.models import (
Dataset,
Field,
MetadataProperty,
Question,
QuestionType,
Record,
Expand Down Expand Up @@ -237,6 +238,30 @@ class TextFieldFactory(FieldFactory):
settings = {"type": FieldType.text.value, "use_markdown": False}


class MetadataPropertyFactory(BaseFactory):
class Meta:
model = MetadataProperty

name = factory.Sequence(lambda n: f"metadata-property-{n}")
description = "Metadata property description"
dataset = factory.SubFactory(DatasetFactory)


class TermsMetadataPropertyFactory(MetadataPropertyFactory):
type = MetadataPropertyType.terms
settings = {"type": MetadataPropertyType.terms}


class IntegerMetadataPropertyFactory(MetadataPropertyFactory):
type = MetadataPropertyType.integer
settings = {"type": MetadataPropertyType.integer}


class FloatMetadataPropertyFactory(MetadataPropertyFactory):
type = MetadataPropertyType.float
settings = {"type": MetadataPropertyType.float}


class QuestionFactory(BaseFactory):
class Meta:
model = Question
Expand Down
Loading

0 comments on commit ac5c987

Please sign in to comment.