Skip to content

Commit

Permalink
update upload_training_files format (#613)
Browse files Browse the repository at this point in the history
* update upload_training_files format

Signed-off-by: Yue, Wenjiao <[email protected]>
  • Loading branch information
WenjiaoYue authored Sep 4, 2024
1 parent 9007212 commit 3367b76
Show file tree
Hide file tree
Showing 5 changed files with 92 additions and 28 deletions.
36 changes: 36 additions & 0 deletions comps/cores/proto/api_protocol.py
Original file line number Diff line number Diff line change
Expand Up @@ -761,3 +761,39 @@ class FineTuningJobList(BaseModel):
If true, additional requests can be made to retrieve more jobs.
"""


class UploadFileRequest(BaseModel):
purpose: str
"""The intended purpose of the uploaded file.
Use "assistants" for Assistants and Message files, "vision" for Assistants image file inputs, "batch" for Batch API, and "fine-tune" for Fine-tuning.
"""

file: UploadFile
"""The File object (not file name) to be uploaded."""


class FileObject(BaseModel):
# Ordered by official OpenAI API documentation
# https://platform.openai.com/docs/api-reference/files/object
id: str
"""The file identifier, which can be referenced in the API endpoints."""

bytes: int
"""The size of the file, in bytes."""

created_at: int
"""The Unix timestamp (in seconds) for when the file was created."""

filename: str
"""The name of the file."""

object: str = "file"
"""The object type, which is always file."""

purpose: str
"""The intended purpose of the file.
Supported values are assistants, assistants_output, batch, batch_output, fine-tune, fine-tune-results and vision.
"""
2 changes: 1 addition & 1 deletion comps/finetuning/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ Assuming a training file `alpaca_data.json` is uploaded, it can be downloaded in
```bash
# upload a training file

curl http://${your_ip}:8015/v1/finetune/upload_training_files -X POST -H "Content-Type: multipart/form-data" -F "files=@./alpaca_data.json"
curl http://${your_ip}:8015/v1/files -X POST -H "Content-Type: multipart/form-data" -F "file=@./alpaca_data.json" -F purpose="fine-tune"

# create a finetuning job
curl http://${your_ip}:8015/v1/fine_tuning/jobs \
Expand Down
30 changes: 8 additions & 22 deletions comps/finetuning/finetuning_service.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,18 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

import os
import urllib.parse
from typing import List, Optional, Union

from fastapi import BackgroundTasks, File, UploadFile
from fastapi import BackgroundTasks, Depends

from comps import opea_microservices, register_microservice
from comps.cores.proto.api_protocol import FineTuningJobIDRequest
from comps.cores.proto.api_protocol import FineTuningJobIDRequest, UploadFileRequest
from comps.finetuning.finetune_config import FineTuningParams
from comps.finetuning.handlers import (
DATASET_BASE_PATH,
handle_cancel_finetuning_job,
handle_create_finetuning_jobs,
handle_list_finetuning_checkpoints,
handle_list_finetuning_jobs,
handle_retrieve_finetuning_job,
save_content_to_local_disk,
handle_upload_training_files,
upload_file,
)


Expand Down Expand Up @@ -51,22 +46,13 @@ def cancel_finetuning_job(request: FineTuningJobIDRequest):

@register_microservice(
name="opea_service@finetuning",
endpoint="/v1/finetune/upload_training_files",
endpoint="/v1/files",
host="0.0.0.0",
port=8015,
)
async def upload_training_files(
files: Optional[Union[UploadFile, List[UploadFile]]] = File(None),
):
if files:
if not isinstance(files, list):
files = [files]
for file in files:
filename = urllib.parse.quote(file.filename, safe="")
save_path = os.path.join(DATASET_BASE_PATH, filename)
await save_content_to_local_disk(save_path, file)

return {"status": 200, "message": "Training files uploaded."}
async def upload_training_files(request: UploadFileRequest = Depends(upload_file)):
uploadFileInfo = await handle_upload_training_files(request)
return uploadFileInfo


@register_microservice(
Expand Down
37 changes: 35 additions & 2 deletions comps/finetuning/handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,24 @@
import os
import random
import time
import urllib.parse
import uuid
from pathlib import Path
from typing import Dict

from fastapi import BackgroundTasks, HTTPException
from fastapi import BackgroundTasks, File, Form, HTTPException, UploadFile
from pydantic_yaml import parse_yaml_raw_as, to_yaml_file
from ray.job_submission import JobSubmissionClient

from comps import CustomLogger
from comps.cores.proto.api_protocol import FineTuningJob, FineTuningJobIDRequest, FineTuningJobList
from comps.cores.proto.api_protocol import (
FileObject,
FineTuningJob,
FineTuningJobIDRequest,
FineTuningJobList,
FineTuningJobsRequest,
UploadFileRequest,
)
from comps.finetuning.finetune_config import FinetuneConfig, FineTuningParams

logger = CustomLogger("finetuning_handlers")
Expand Down Expand Up @@ -185,3 +193,28 @@ def handle_list_finetuning_checkpoints(request: FineTuningJobIDRequest):
if os.path.exists(output_dir):
checkpoints = os.listdir(output_dir)
return checkpoints


async def upload_file(purpose: str = Form(...), file: UploadFile = File(...)):
return UploadFileRequest(purpose=purpose, file=file)


async def handle_upload_training_files(request: UploadFileRequest):
file = request.file
if file is None:
raise HTTPException(status_code=404, detail="upload file failed!")
filename = urllib.parse.quote(file.filename, safe="")
save_path = os.path.join(DATASET_BASE_PATH, filename)
await save_content_to_local_disk(save_path, file)

fileBytes = os.path.getsize(save_path)
fileInfo = FileObject(
id=f"file-{uuid.uuid4()}",
object="file",
bytes=fileBytes,
created_at=int(time.time()),
filename=filename,
purpose="fine-tune",
)

return fileInfo
15 changes: 12 additions & 3 deletions tests/test_finetuning.sh
Original file line number Diff line number Diff line change
Expand Up @@ -33,21 +33,30 @@ function validate_microservice() {
export no_proxy="localhost,127.0.0.1,"${ip_address}

# test /v1/dataprep upload file
URL="http://${ip_address}:$finetuning_service_port/v1/finetune/upload_training_files"
URL="http://${ip_address}:$finetuning_service_port/v1/files"
echo '[{"instruction": "Give three tips for staying healthy.", "input": "", "output": "1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule."},{"instruction": "Give three tips for staying healthy.", "input": "", "output": "1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule."},{"instruction": "Give three tips for staying healthy.", "input": "", "output": "1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule."},{"instruction": "Give three tips for staying healthy.", "input": "", "output": "1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule."},{"instruction": "Give three tips for staying healthy.", "input": "", "output": "1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule."},{"instruction": "Give three tips for staying healthy.", "input": "", "output": "1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule."},{"instruction": "Give three tips for staying healthy.", "input": "", "output": "1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule."},{"instruction": "Give three tips for staying healthy.", "input": "", "output": "1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule."},{"instruction": "Give three tips for staying healthy.", "input": "", "output": "1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule."},{"instruction": "Give three tips for staying healthy.", "input": "", "output": "1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule."},{"instruction": "Give three tips for staying healthy.", "input": "", "output": "1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule."},{"instruction": "Give three tips for staying healthy.", "input": "", "output": "1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule."},{"instruction": "Give three tips for staying healthy.", "input": "", "output": "1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule."},{"instruction": "Give three tips for staying healthy.", "input": "", "output": "1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule."},{"instruction": "Give three tips for staying healthy.", "input": "", "output": "1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule."},{"instruction": "Give three tips for staying healthy.", "input": "", "output": "1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule."},{"instruction": "Give three tips for staying healthy.", "input": "", "output": "1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule."},{"instruction": "Give three tips for staying healthy.", "input": "", "output": "1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule."},{"instruction": "Give three tips for staying healthy.", "input": "", "output": "1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule."},{"instruction": "Give three tips for staying healthy.", "input": "", "output": "1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule."}]' > $LOG_PATH/test_data.json
HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -F 'files=@./test_data.json' -H 'Content-Type: multipart/form-data' "$URL")
HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -F 'file=@./test_data.json' -F purpose="fine-tune" -H 'Content-Type: multipart/form-data' "$URL")
HTTP_STATUS=$(echo $HTTP_RESPONSE | tr -d '\n' | sed -e 's/.*HTTPSTATUS://')
RESPONSE_BODY=$(echo $HTTP_RESPONSE | sed -e 's/HTTPSTATUS\:.*//g')
SERVICE_NAME="finetuning-server - upload - file"

# Parse the JSON response
purpose=$(echo "$RESPONSE_BODY" | jq -r '.purpose')
filename=$(echo "$RESPONSE_BODY" | jq -r '.filename')

# Define expected values
expected_purpose="fine-tune"
expected_filename="test_data.json"

if [ "$HTTP_STATUS" -ne "200" ]; then
echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS"
docker logs finetuning-server >> ${LOG_PATH}/finetuning-server_upload_file.log
exit 1
else
echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..."
fi
if [[ "$RESPONSE_BODY" != *"Training files uploaded"* ]]; then
# Check if the parsed values match the expected values
if [[ "$purpose" != "$expected_purpose" || "$filename" != "$expected_filename" ]]; then
echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY"
docker logs finetuning-server >> ${LOG_PATH}/finetuning-server_upload_file.log
exit 1
Expand Down

0 comments on commit 3367b76

Please sign in to comment.