Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Script for collecting task files paths. #32

Merged
merged 24 commits into from
Jan 31, 2024
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions jba/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,25 @@ docker run hyperstyle-analysis-prod:<VERSION> poetry run process_course_data [ar

After this step you will get a new file with course data with `courseId_preprocessed` suffix.


3. [tasktracker_content_collector.py](src/processing/tasktracker_content_collector.py) Collects data from the course into the tasktracker task content file format
mikrise2 marked this conversation as resolved.
Show resolved Hide resolved

### Usage

Execute one of the following commands with necessary arguments:
```bash
poetry run tasktracker_content_collector [arguments]
```
or
```bash
docker run hyperstyle-analysis-prod:<VERSION> poetry run tasktracker_content_collector [arguments]
```
mikrise2 marked this conversation as resolved.
Show resolved Hide resolved

**Required arguments**:

- `destination_path` — Path to directory where yaml file will be created.
- `course_sources_path` — Path to course sources to extract course structure..

----

# Simple general statistics
Expand Down
8 changes: 8 additions & 0 deletions jba/src/models/edu_structure.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from dataclasses import dataclass
from enum import Enum
from pathlib import Path
from typing import List, Optional


Expand All @@ -16,3 +17,10 @@ class EduStructureNode:
name: str
structure_type: EduStructureType
children: Optional[List['EduStructureNode']]


@dataclass(frozen=True)
class EduLesson:
root: Path
is_framework: bool
children: List[str]
mikrise2 marked this conversation as resolved.
Show resolved Hide resolved
162 changes: 162 additions & 0 deletions jba/src/processing/tasktracker_content_collector.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
import re
from os import listdir

import argparse
from pathlib import Path

import yaml
from core.src.utils.file.extension_utils import AnalysisExtension
from core.src.utils.file.yaml_utils import read_yaml_field_content
from jba.src.models.edu_structure import EduStructureType, EduLesson

CONTENT_META_FIELD = 'content'
FILES_META_FIELD = 'files'
VISIBLE_META_FIELD = 'visible'
TYPE_META_FIELD = 'type'
NAME_META_FIELD = 'name'

CONTENT_FILE_NAME = 'task_content_default.yaml'

FRAMEWORK_TYPE = 'framework'

TASK_DIRECTORY_NAME = 'task'

INFO_FILE_REGEX = re.compile(f'([a-z]+)-info{AnalysisExtension.YAML.value}')
mikrise2 marked this conversation as resolved.
Show resolved Hide resolved

EXTENSIONS = {'py': 'PYTHON', 'ipynb': 'JUPYTER', 'java': 'JAVA', 'kt': 'KOTLIN', 'cpp': 'CPP', 'csv': 'CSV'}


class TaskTrackerFile:
def __init__(self, rel_path):
self.path = rel_path.parent
self.name = rel_path.stem
self.extension = EXTENSIONS.get(rel_path.suffix.lstrip('.'), 'NO_EXTENSION')

def as_dict(self):
return {
'filename': self.name,
'relativePath': str(self.path),
'extension': self.extension,
'sourceSet': 'SRC',
'isInternal': False
}

def __eq__(self, other):
if isinstance(other, TaskTrackerFile):
return self.name == other.name and self.path == other.path and self.extension == other.extension
return False

def __hash__(self):
return hash((self.name, self.path, self.extension))


def get_data_template(files: list) -> dict:
mikrise2 marked this conversation as resolved.
Show resolved Hide resolved
return {
'tasks': [{
'name': 'example',
'description': 'description',
'id': 'main',
'files': files
}]
}


def flatten(paths: list) -> list:
result = []
for i in paths:
if i is None:
continue
if isinstance(i, list):
result.extend(flatten(i))
else:
result.append(i)
return result


def get_info_file(root: Path) -> str:
file_names = listdir(root)
info_files = list(filter(lambda file_name: re.match(INFO_FILE_REGEX, file_name), file_names))

return info_files[0]
mikrise2 marked this conversation as resolved.
Show resolved Hide resolved


def get_lessons(root: Path):
mikrise2 marked this conversation as resolved.
Show resolved Hide resolved
info_file = get_info_file(root)
info_file_structure_type = re.match(INFO_FILE_REGEX, info_file).group(1)
structure_type = EduStructureType(info_file_structure_type)
if structure_type == EduStructureType.LESSON:
content = read_yaml_field_content(root / info_file, CONTENT_META_FIELD)
yaml_file_content = read_yaml_field_content(root / info_file, TYPE_META_FIELD)
return EduLesson(root, yaml_file_content is not None and yaml_file_content == FRAMEWORK_TYPE, content)
elif structure_type == EduStructureType.TASK or structure_type is None:
return None
children = None
content = read_yaml_field_content(root / info_file, CONTENT_META_FIELD)
if content is not None:
children = flatten([get_lessons(root / name) for name in content])
return children


def get_files(root: Path, lesson: EduLesson) -> list:
relative_path = lesson.root.relative_to(root)
return flatten(
[get_task_files(lesson.root / child, relative_path, lesson.is_framework) for child in lesson.children])
mikrise2 marked this conversation as resolved.
Show resolved Hide resolved


def get_task_files(root: Path, relative_path: Path, is_framework: bool):
mikrise2 marked this conversation as resolved.
Show resolved Hide resolved
info_file = get_info_file(root)

files = read_yaml_field_content(root / info_file, FILES_META_FIELD)
if files is None:
files = []
files = list(filter(lambda file: file[VISIBLE_META_FIELD], files))

def get_filename(file_content: dict) -> TaskTrackerFile:
if is_framework:
return TaskTrackerFile(relative_path / TASK_DIRECTORY_NAME / file_content[NAME_META_FIELD])
return TaskTrackerFile(relative_path / root.name / file_content[NAME_META_FIELD])

return list(map(get_filename, files))


def get_yaml_content(course_root: Path) -> dict:
lessons = get_lessons(course_root)

files = set()
for lesson in lessons:
for i in get_files(course_root, lesson):
files.add(i)
return get_data_template(list(map(lambda obj: obj.as_dict(), files)))


def configure_parser(parser: argparse.ArgumentParser) -> None:
mikrise2 marked this conversation as resolved.
Show resolved Hide resolved
parser.add_argument(
'destination_path',
type=lambda value: Path(value).absolute(),
help='Path to directory where yaml file will be created',
)
GirZ0n marked this conversation as resolved.
Show resolved Hide resolved

parser.add_argument(
'course_sources_path',
type=lambda value: Path(value).absolute(),
help='Path to course sources to extract course structure.',
)


def save_yaml_file(yaml_content: dict, path: Path):
with open(path, 'w') as file:
yaml.dump(yaml_content, file)
mikrise2 marked this conversation as resolved.
Show resolved Hide resolved


def main():
parser = argparse.ArgumentParser()
configure_parser(parser)

args = parser.parse_args()

yaml_content = get_yaml_content(args.course_sources_path)
save_yaml_file(yaml_content, args.destination_path / CONTENT_FILE_NAME)


if __name__ == '__main__':
main()
50 changes: 50 additions & 0 deletions jba/tests/processing/test_tasktracker_content_collector.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import os
import sys
from pathlib import Path

from core.src.utils.subprocess_runner import run_in_subprocess
from jba.src import MAIN_FOLDER

from jba.tests.processing import PROCESSING_FOLDER

import tempfile

import yaml

from deepdiff import DeepDiff # noqa: SC200
mikrise2 marked this conversation as resolved.
Show resolved Hide resolved


def test_incorrect_arguments():
command = [sys.executable, (MAIN_FOLDER.parent / 'processing' / 'tasktracker_content_collector.py')]

stdout, stderr = run_in_subprocess(command)

assert stdout == ''
assert 'error: the following arguments are required' in stderr


def yaml_as_dict(my_file):
result = {}
with open(my_file, 'r') as file_path:
docs = yaml.safe_load_all(file_path)
for doc in docs:
for key, value in doc.items():
result[key] = value
return result
mikrise2 marked this conversation as resolved.
Show resolved Hide resolved


def test_correct_arguments():
mikrise2 marked this conversation as resolved.
Show resolved Hide resolved
with tempfile.TemporaryDirectory() as temp_directory:
prepare_course_directory = PROCESSING_FOLDER / 'prepare_course_data'
command = [sys.executable, (MAIN_FOLDER.parent / 'processing' / 'tasktracker_content_collector.py'),
Path(temp_directory),
prepare_course_directory / 'course_with_section']
stdout, stderr = run_in_subprocess(command)
assert stdout == ''
assert stderr == ''
assert len(os.listdir(temp_directory)) == 1
file = Path(temp_directory) / 'task_content_default.yaml'
mikrise2 marked this conversation as resolved.
Show resolved Hide resolved
assert file.exists()
expected_file = (prepare_course_directory / 'expected_tasktracker_result.yaml')
difference = DeepDiff(yaml_as_dict(file), yaml_as_dict(expected_file), ignore_order=True)
assert not difference
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
tasks:
- description: description
files:
- extension: KOTLIN
filename: Main
isInternal: false
relativePath: section/lesson/task1/src/main/kotlin
sourceSet: SRC
- extension: KOTLIN
filename: Main
isInternal: false
relativePath: section/lesson/task3/src/main/kotlin
sourceSet: SRC
- extension: KOTLIN
filename: Main
isInternal: false
relativePath: section/lesson/task2/src/main/kotlin
sourceSet: SRC
id: main
name: example
50 changes: 48 additions & 2 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading
Loading