From 44d0a4f1e87c2b6db4b68d44ba6b5bb705610c01 Mon Sep 17 00:00:00 2001 From: Kevin Meinhardt Date: Mon, 16 Sep 2024 18:41:18 +0200 Subject: [PATCH] Remove initialize_docker: Integrate data initialization to make up Add Args for init TMP: clean up TMP: update command decsription --- .github/actions/run-docker/action.yml | 16 ++ Makefile-docker | 49 +--- Makefile-os | 39 ++- docker-compose.yml | 7 + docs/topics/development/data_management.md | 26 +- docs/topics/development/makefile_commands.md | 10 +- .../development/setup_and_configuration.md | 55 ++--- .../amo/management/commands/create_db.py | 57 ----- .../management/commands/initialize_data.py | 201 ++++++++++++++++ src/olympia/amo/tests/test_commands.py | 226 ++++++++++++++++++ 10 files changed, 523 insertions(+), 163 deletions(-) delete mode 100644 src/olympia/amo/management/commands/create_db.py create mode 100644 src/olympia/amo/management/commands/initialize_data.py diff --git a/.github/actions/run-docker/action.yml b/.github/actions/run-docker/action.yml index 5bddffd8213..b9ad240030d 100644 --- a/.github/actions/run-docker/action.yml +++ b/.github/actions/run-docker/action.yml @@ -20,6 +20,18 @@ inputs: description: 'The docker-compose file to use' required: false default: 'docker-compose.yml:docker-compose.ci.yml' + init_force_db: + description: 'Force the initialization of the database' + required: false + default: '' + init_skip_seed: + description: 'Skip the seeding of the database' + required: false + default: 'true' + init_skip_index: + description: 'Skip the indexing of the database' + required: false + default: 'true' runs: using: 'composite' steps: @@ -36,6 +48,10 @@ runs: COMPOSE_FILE: ${{ inputs.compose_file }} DOCKER_SERVICES: ${{ inputs.services }} HOST_UID: ${{ steps.id.outputs.id }} + INIT_FORCE_DB: ${{ inputs.init_force_db }} + INIT_SKIP_SEED: ${{ inputs.init_skip_seed }} + INIT_SKIP_INDEX: ${{ inputs.init_skip_index }} + run: | # Start the specified services make up diff --git a/Makefile-docker b/Makefile-docker index fbc31b3051b..5c75e534ff5 100644 --- a/Makefile-docker +++ b/Makefile-docker @@ -52,41 +52,6 @@ check_django: ## check if the django app is configured properly .PHONY: check check: check_files check_olympia_user check_debian_packages check_pip_packages check_django -.PHONY: initialize_db -initialize_db: ## create a new database - rm -rf ./user-media/* ./tmp/* - $(PYTHON_COMMAND) manage.py create_db --force - $(PYTHON_COMMAND) manage.py migrate --noinput - $(PYTHON_COMMAND) manage.py loaddata initial.json - $(PYTHON_COMMAND) manage.py import_prod_versions - # The superuser needs to have a mozilla.com address for admin tools access - $(PYTHON_COMMAND) manage.py createsuperuser \ - --no-input \ - --username "local_admin" \ - --email "local_admin@mozilla.com" - $(PYTHON_COMMAND) manage.py loaddata zadmin/users - -.PHONY: reindex_data -reindex_data: ## reindex the data in elasticsearch - $(PYTHON_COMMAND) manage.py reindex --force --noinput - -.PHONY: populate_data -populate_data: ## populate a new database - # reindex --wipe will force the ES mapping to be re-installed. Useful to - # make sure the mapping is correct before adding a bunch of add-ons. - $(PYTHON_COMMAND) manage.py reindex --wipe --force --noinput - $(PYTHON_COMMAND) manage.py generate_addons --app firefox $(NUM_ADDONS) - $(PYTHON_COMMAND) manage.py generate_addons --app android $(NUM_ADDONS) - $(PYTHON_COMMAND) manage.py generate_themes $(NUM_THEMES) - # These add-ons are specifically useful for the addons-frontend - # homepage. You may have to re-run this, in case the data there - # changes. - $(PYTHON_COMMAND) manage.py generate_default_addons_for_frontend - -.PHONY: update_db -update_db: ## run the database migrations - $(PYTHON_COMMAND) manage.py migrate --noinput - .PHONY: update_assets update_assets: # Copy files required in compress_assets to the static folder @@ -96,13 +61,6 @@ update_assets: # Collect static files: This MUST be run last or files will be missing $(PYTHON_COMMAND) manage.py collectstatic --noinput -.PHONY: update -update: update_db update_assets ## update the dependencies, the database, and assets - -.PHONY: reindex -reindex: ## reindex everything in elasticsearch, for AMO - $(PYTHON_COMMAND) manage.py reindex $(ARGS) - .PHONY: setup-ui-tests setup-ui-tests: rm -rf ./user-media/* ./tmp/* @@ -156,8 +114,11 @@ djshell: ## connect to django shell dbshell: ## connect to a database shell $(PYTHON_COMMAND) ./manage.py dbshell -.PHONY: initialize -initialize: initialize_db update_assets populate_data reindex_data ## init the dependencies, the database, and assets +.PHONY: initialize_data +initialize_data: ## ensure database exists + @echo "Initializing data..." + @echo "args: $(ARGS)" + $(PYTHON_COMMAND) ./manage.py initialize_data $(ARGS) reload-uwsgi: reload diff --git a/Makefile-os b/Makefile-os index 259f00614f5..20d37c8923d 100644 --- a/Makefile-os +++ b/Makefile-os @@ -13,11 +13,30 @@ export DOCKER_COMMIT ?= export DOCKER_BUILD ?= export DOCKER_VERSION ?= override DOCKER_MYSQLD_VOLUME = addons-server_data_mysqld +override DOCKER_COMPOSE_EXEC = docker compose exec +override DOCKER_COMPOSE_EXEC_WEB = $(DOCKER_COMPOSE_EXEC) --user olympia web override BACKUPS_DIR = $(shell pwd)/backups override EXPORT_DIR = $(BACKUPS_DIR)/$(shell date +%Y%m%d%H%M%S) RESTORE_DIR ?= $(BACKUPS_DIR)/$(shell ls -1 backups | sort -r | head -n 1) +INITIALIZE_ARGS ?= +INIT_FORCE_DB ?= +INIT_SKIP_SEED ?= +INIT_SKIP_INDEX ?= + +ifneq ($(INIT_FORCE_DB),) + INITIALIZE_ARGS += --force-db +endif + +ifneq ($(INIT_SKIP_SEED),) + INITIALIZE_ARGS += --skip-seed +endif + +ifneq ($(INIT_SKIP_INDEX),) + INITIALIZE_ARGS += --skip-index +endif + DOCKER_BAKE_ARGS := \ --file docker-bake.hcl \ --file .env \ @@ -70,29 +89,29 @@ update_docker: data_export up data_restore ## update all the docker images .PHONY: shell shell: ## connect to a running addons-server docker shell - docker compose exec --user olympia web bash + $(DOCKER_COMPOSE_EXEC_WEB) bash .PHONY: rootshell rootshell: ## connect to a running addons-server docker shell with root user - docker compose exec --user root web bash + $(DOCKER_COMPOSE_EXEC) --user root web bash .PHONY: data_export data_export: @ mkdir -p $(EXPORT_DIR) # Extracting mysql database - docker compose exec mysqld /usr/bin/mysqldump olympia > $(EXPORT_DIR)/data_mysqld.sql + $(DOCKER_COMPOSE_EXEC) mysqld /usr/bin/mysqldump olympia > $(EXPORT_DIR)/data_mysqld.sql .PHONY: data_restore data_restore: @[ -d $(RESTORE_DIR) ] || (echo "Directory $(RESTORE_DIR) does not exist" && exit 1) # Wait for MySQL server to be ready - docker compose exec mysqld bash \ + $(DOCKER_COMPOSE_EXEC) mysqld bash \ -c 'while ! mysqladmin ping --silent; do echo "waiting"; sleep 1; done' # Restoring mysql database - docker compose exec -T mysqld /usr/bin/mysql olympia < $(RESTORE_DIR)/data_mysqld.sql + $(DOCKER_COMPOSE_EXEC) -T mysqld /usr/bin/mysql olympia < $(RESTORE_DIR)/data_mysqld.sql $(MAKE) reindex_data @@ -166,16 +185,16 @@ docker_compose_up: docker_mysqld_volume_create ## Start the docker containers .PHONY: up up: setup docker_pull_or_build docker_compose_up docker_clean_images docker_clean_volumes ## Create and start docker compose + # Explicitly run initialize via the web container as make can get confused + # both routing the command to the web container and + # routing the command to the proper target. + $(DOCKER_COMPOSE_EXEC_WEB) make -f Makefile-docker initialize_data ARGS=$(shell echo "'$(INITIALIZE_ARGS)'") .PHONY: down down: docker_compose_down docker_clean_images docker_clean_volumes ## Stop the docker containers and clean up non-peristent dangling resources -.PHONY: initialize_docker -initialize_docker: up - docker compose exec --user olympia web make initialize - %: ## This directs any other recipe (command) to the web container's make. - docker compose exec --user olympia web make $(MAKECMDGOALS) ARGS=$(ARGS) + $(DOCKER_COMPOSE_EXEC_WEB) make $(MAKECMDGOALS) ARGS=$(ARGS) # You probably want to put new commands in Makefile-docker, unless they operate # on multiple containers or are host-os specific. diff --git a/docker-compose.yml b/docker-compose.yml index 781bb10f80c..27f4b95b3d4 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -68,6 +68,13 @@ services: test: ["CMD-SHELL", "DJANGO_SETTINGS_MODULE=olympia celery -A olympia.amo.celery status"] interval: 1s retries: 100 + depends_on: + - mysqld + - elasticsearch + - redis + - memcached + - rabbitmq + - autograph web: <<: *worker diff --git a/docs/topics/development/data_management.md b/docs/topics/development/data_management.md index 25d97eccd2e..85dcff1641b 100644 --- a/docs/topics/development/data_management.md +++ b/docs/topics/development/data_management.md @@ -12,26 +12,18 @@ The use of an external mount allows for manual management of the data lifecycle. ## Data Population -The `make initialize_docker` command handles initial data population, including creating the database, running migrations, and seeding the database. +When you run `make up` make will run the `initialize_data` command for you. This command will check if the database exists, and if the elasticsearch index exists. -If you already have running containers, you can just run `make initialize` to reset the database, populate data, and reindex. +If they don't exist it will create them. This command can be run manually as well. - **Database Initialization**: ```sh - make initialize_docker + make initialize_data ``` -- **Command Breakdown**: - - **`make up`**: Starts the Docker containers. - - **`make initialize`**: Runs database migrations and seeds the database with initial data. - -The `make initialize` command, executed as part of `make initialize_docker`, performs the following steps: - -1. **Create Database**: Sets up the initial database schema. -2. **Run Migrations**: Applies any pending database migrations. -3. **Seed Database**: Inserts initial data into the database. -4. **Reindex**: Rebuilds the search index in Elasticsearch. +This will create the database, run migrations, seed the database and create the index in elasticsearch. +If any of these steps have already been run, they will be skipped. ## Exporting and Loading Data Snapshots @@ -62,3 +54,11 @@ Refer to the Makefile for detailed instructions on these commands. This comprehensive setup ensures that the development environment is fully prepared with the necessary data. By following these practices, developers can manage data effectively in the **addons-server** project. The use of persistent volumes, external mounts, data snapshots, and automated data population ensures a robust and flexible data management strategy. For more detailed instructions, refer to the project's Makefile and Docker Compose configuration in the repository. + +- **Hard Reset Database**: + +In order to manually re-initialize the databse you can run the command with the `--foce` argument. This will delete the existing data. This will force recreate the database, seed it, and reindex. + +```bash +make initialize_data INIT_FORCE_DB=true +``` diff --git a/docs/topics/development/makefile_commands.md b/docs/topics/development/makefile_commands.md index f7b24809f5d..4b5a03e3be9 100644 --- a/docs/topics/development/makefile_commands.md +++ b/docs/topics/development/makefile_commands.md @@ -102,15 +102,7 @@ A common benefit of using Makefiles in this manner is the ability to coordinate make data_restore ``` -2. **`initialize_docker`**: - - **Purpose**: Sets up the initial Docker environment, including database initialization and data population. - - **Usage**: - - ```sh - make initialize_docker - ``` - -3. **`build_docker_image`**: +2. **`build_docker_image`**: - **Purpose**: Builds the Docker image using BuildKit and Bake. - **Usage**: diff --git a/docs/topics/development/setup_and_configuration.md b/docs/topics/development/setup_and_configuration.md index 36530e54f99..55dd7ea3048 100644 --- a/docs/topics/development/setup_and_configuration.md +++ b/docs/topics/development/setup_and_configuration.md @@ -15,49 +15,31 @@ Follow these steps to get started: cd addons-server ``` -(running-for-the-first-time)= -## Running for the first time - -When running the project for the first time, execute: - -```sh -make initialize_docker -``` - -This command will run: - -- `make up` to start the Docker containers. -- `make initialize` to set up the initial Docker environment, including database initialization and data population. -Detailed steps for `make initialize` will be covered in Section 6 on Data Management. - -If you run `make up` without running `make initialize` the docker compose services will be running, but you will not have a database -and the app might crash or otherwise be unusable. - -Similarly, you can run `make initialize` even after you have an up and running environment, but this will totally reset your database -as if you were running the application fresh. - -## Updating your environment +## Running the docker compose project > TLDR; Just run `make up`. The _make up_ command ensures all necessary files are created on the host and starts the Docker Compose project, -including volumes, containers, and networks. It is meant to be run frequently whenever you want to bring your environment "up". +including volumes, containers, networks, databases and indexes. +It is meant to be run frequently whenever you want to bring your environment "up". Here's a high-level overview of what _make up_ does: ```make .PHONY: up -up: setup docker_pull_or_build docker_compose_up docker_clean_images docker_clean_volumes ## Create and start docker compose +up: setup docker_pull_or_build docker_compose_up docker_clean_images docker_clean_volumes data ``` - **setup**: Creates configuration files such as `.env` and `version.json`. - **docker_pull_or_build**: Pulls or builds the Docker image based on the image version. - **docker_compose_up**: Starts the Docker containers defined in [docker-compose.yml][docker-compose]. - **docker_clean_images** and **docker_clean_volumes**: Cleans up unused Docker images and volumes. +- **data**: Ensures the database, seed, and index are created. -What happens if you run `make up` when your environment is already running? -This will result in all services and volumes being recreated as if starting them for the first time, -and will clear any local state from the containers. The `make up` command is {ref}`idempotent ` so you can run it over and over. +What happens if you run `make up` when your environment is already running?. +Well that depends on what is changed since the last time you ran it. +Because `make up` is {ref}`idempotent ` it will only run the commands that are necessary to bring your environment up to date. +If nothing has changed, nothing will happen because your environment is already in the desired state. ## Shutting down your environment @@ -70,6 +52,19 @@ Running `make down` will free up resources on your machine and can help if your A common solution to many problems is to run `make down && make up`. +> NOTE: When you run make down, it will clear all volumes except the data_mysqld volume. +> This is where your database and other persisted data is stored. +> If you want to start fresh, you can delete the data_mysqld volume. + +```sh +make down +make docker_mysqld_volume_remove # Remove the mysql database volume +make up +``` + +If you want to completely nuke your environment and start over as if you had just cloned the repo, +you can run `make clean_docker`. This will `make down` and remove all docker resources taking space on the host machine. + ### Accessing the Development App - Add the following entry to your `/etc/hosts` file to access **addons-server** via a local domain: @@ -219,11 +214,11 @@ Another way to find out what's wrong is to run `docker compose logs`. ### Getting "Programming error [table] doesn't exist"? -Make sure you've run the `make initialize_docker` step as {ref}`detailed ` in the initial setup instructions. +Make sure you've run `make up`. ### ConnectionError during initialize (elasticsearch container fails to start) -When running `make initialize_docker` without a working elasticsearch container, you'll get a ConnectionError. Check the logs with `docker compose logs`. If elasticsearch is complaining about `vm.max_map_count`, run this command on your computer or your docker-machine VM: +When running `make up` without a working elasticsearch container, you'll get a ConnectionError. Check the logs with `docker compose logs`. If elasticsearch is complaining about `vm.max_map_count`, run this command on your computer or your docker-machine VM: ```sh sudo sysctl -w vm.max_map_count=262144 @@ -233,7 +228,7 @@ This allows processes to allocate more [memory map areas](https://stackoverflow. ### Connection to elasticsearch timed out (elasticsearch container exits with code 137) -`docker compose up -d` brings up all containers, but running `make initialize_docker` causes the elasticsearch container to go down. Running `docker compose ps` shows _Exited (137)_ against it. +`docker compose up -d` brings up all containers, but running `make up` causes the elasticsearch container to go down. Running `docker compose ps` shows _Exited (137)_ against it. Update default settings in Docker Desktop - we suggest increasing RAM limit to at least 4 GB in the Resources/Advanced section and click on "Apply and Restart". diff --git a/src/olympia/amo/management/commands/create_db.py b/src/olympia/amo/management/commands/create_db.py deleted file mode 100644 index 16308e34c8f..00000000000 --- a/src/olympia/amo/management/commands/create_db.py +++ /dev/null @@ -1,57 +0,0 @@ -import logging - -from django.conf import settings -from django.core.management.base import BaseCommand, CommandError - -import MySQLdb as mysql - - -class Command(BaseCommand): - """Based on django_extension's reset_db command but simplifed and with - support for all character sets defined in settings.""" - - help = 'Creates the database for this project.' - - def add_arguments(self, parser): - super().add_arguments(parser) - parser.add_argument( - '--force', action='store_true', help='Drops any existing database first.' - ) - - def handle(self, *args, **options): - """ - Create the database. - """ - db_info = settings.DATABASES.get('default') - - engine = db_info.get('ENGINE').split('.')[-1] - if engine != 'mysql': - raise CommandError('create_db only supports mysql databases') - - database_name = db_info.get('NAME') - kwargs = { - 'user': db_info.get('USER'), - 'passwd': db_info.get('PASSWORD'), - 'host': db_info.get('HOST'), - } - if db_info.get('PORT'): - kwargs['port'] = int(db_info.get('PORT')) - connection = mysql.connect(**kwargs) - - if options.get('force'): - drop_query = 'DROP DATABASE IF EXISTS `%s`' % database_name - else: - drop_query = None - - character_set = db_info.get('OPTIONS').get('charset', 'utf8mb4') - create_query = 'CREATE DATABASE `{}` CHARACTER SET {}'.format( - database_name, - character_set, - ) - if drop_query: - logging.info('Executing... "' + drop_query + '"') - connection.query(drop_query) - logging.info('Executing... "' + create_query + '"') - connection.query(create_query) - - logging.info('Reset successful.') diff --git a/src/olympia/amo/management/commands/initialize_data.py b/src/olympia/amo/management/commands/initialize_data.py new file mode 100644 index 00000000000..2fc58906677 --- /dev/null +++ b/src/olympia/amo/management/commands/initialize_data.py @@ -0,0 +1,201 @@ +import logging +import os +import random +import time +from functools import wraps + +from django.conf import settings +from django.core.management import call_command +from django.core.management.base import BaseCommand, CommandError + +import MySQLdb as mysql + +from olympia.search.utils import get_es + + +def retry_with_backoff(retries=3, backoff_in_seconds=1): + def decorator(func): + @wraps(func) + def wrapper(*args, **kwargs): + x = 0 + while True: + try: + return func(*args, **kwargs) + except Exception as e: + if x == retries: + raise + else: + sleep = backoff_in_seconds * 2**x + random.uniform(0, 1) + logging.warning( + f'{wrapper.__name__} failed. ' + f'Retrying in {sleep:.2f} seconds... Error: {str(e)}' + ) + time.sleep(sleep) + x += 1 + + return wrapper + + return decorator + + +class Command(BaseCommand): + """ + Creates the database for this project. + + This command is idempotent and will not re-create the database if it already exists. + It will also not re-seed the database or reindex the data + if the database already exists. + """ + + help = 'Creates, seeds, and indexes the database for this project.' + connection = None + db_info = None + db_exists = False + num_addons = 10 + num_themes = num_addons + + def __init__(self, *args, **options): + super().__init__(*args, **options) + + self.db_info = settings.DATABASES.get('default') + self.connection = self.connect_to_db() + self.db_exists = self.check_db_exists() + + def add_arguments(self, parser): + super().add_arguments(parser) + parser.add_argument( + '--force-db', action='store_true', help='Force creating the database' + ) + parser.add_argument( + '--skip-seed', + action='store_true', + help='Skip seeding the database with addons', + ) + parser.add_argument( + '--skip-index', action='store_true', help='Skip indexing the database' + ) + + @retry_with_backoff(retries=3, backoff_in_seconds=1) + def connect_to_db(self): + engine = self.db_info.get('ENGINE').split('.')[-1] + if engine != 'mysql': + raise CommandError('create_db only supports mysql databases') + + kwargs = { + 'user': self.db_info.get('USER'), + 'passwd': self.db_info.get('PASSWORD'), + 'host': self.db_info.get('HOST'), + } + if self.db_info.get('PORT'): + kwargs['port'] = int(self.db_info.get('PORT')) + + logging.info('connecting to db') + return mysql.connect(**kwargs) + + @retry_with_backoff(retries=3, backoff_in_seconds=1) + def check_db_exists(self): + try: + self.connection.select_db(self.db_info.get('NAME')) + return True + except mysql.Error as exc: + logging.info(exc) + return False + + def create_db(self): + logging.info('Cleaning up directories linked to database records...') + root = os.path.join('/', 'data', 'olympia') + clean_dirs = ( + os.path.join(root, 'user-media'), + os.path.join(root, 'tmp'), + ) + + for dir in clean_dirs: + if os.path.exists(dir): + logging.info(f'Cleaning up {dir}...') + os.rmdir(dir) + + database_name = self.db_info.get('NAME') + character_set = self.db_info.get('OPTIONS').get('charset', 'utf8mb4') + + if self.db_exists: + drop_query = f'DROP DATABASE `{database_name}`' + logging.info('Executing... "' + drop_query + '"') + self.connection.query(drop_query) + + create_query = ( + f'CREATE DATABASE `{database_name}` CHARACTER SET {character_set}' + ) + logging.info('Executing... "' + create_query + '"') + self.connection.query(create_query) + + def seed_db(self): + logging.info('Creating seed data...') + # reindex --wipe will force the ES mapping to be re-installed. Useful to + # make sure the mapping is correct before adding a bunch of add-ons. + call_command('reindex', '--wipe', '--force', '--noinput') + call_command('generate_addons', '--app', 'firefox', self.num_addons) + call_command('generate_addons', '--app', 'android', self.num_addons) + call_command('generate_themes', self.num_themes) + # These add-ons are specifically useful for the addons-frontend + # homepage. You may have to re-run this, in case the data there + # changes. + call_command('generate_default_addons_for_frontend') + + def load_initial_data(self): + logging.info('Loading initial data...') + call_command('loaddata', 'initial.json') + call_command('import_prod_versions') + call_command( + 'createsuperuser', + '--no-input', + '--username', + 'local_admin', + '--email', + 'local_admin@mozilla.com', + ) + call_command('loaddata', 'zadmin/users') + + def handle(self, *args, **options): + """ + Create the database. + """ + force_db = options.get('force_db') + skip_seed = options.get('skip_seed') + skip_index = options.get('skip_index') + + logging.info(f'options: {options}') + + # Initialize ES inside the handle method + ES = get_es() + + # Step 1: Ensure the database exists + # is migrated and contains initial data if creating. + create_new_db = force_db or not self.db_exists + + # only create the db if we want to or need to + self.create_db() if create_new_db else logging.info('Database already exists.') + + # Migrate database even if not creating anew. + logging.info('Migrating...') + call_command('migrate', '--noinput') + + # Load initial data after migrations + self.load_initial_data() if create_new_db else logging.info( + 'Skipping load initial data.' + ) + + # Step 2: Seed the db if it is a fresh database or we have opted in to seeding. + seed_db = create_new_db and not skip_seed + + self.seed_db() if seed_db else logging.info('Skipping seeding the database.') + + # Step 3: Index the db unless we opt out of indexing. + alias = settings.ES_INDEXES.get('default', None) + index_exists = ES.indices.exists(index=alias) + + will_index_db = (seed_db or not index_exists) and not skip_index + + if will_index_db: + call_command('reindex', '--noinput', '--force') + else: + logging.info('Skipping indexing the database.') diff --git a/src/olympia/amo/tests/test_commands.py b/src/olympia/amo/tests/test_commands.py index 27a620b4131..ed7a21792d0 100644 --- a/src/olympia/amo/tests/test_commands.py +++ b/src/olympia/amo/tests/test_commands.py @@ -10,6 +10,7 @@ from django.test.utils import override_settings import pytest +from MySQLdb import Error as MySQLError from olympia.addons.models import Preview from olympia.amo.management.commands.get_changed_files import ( @@ -332,3 +333,228 @@ def path(self): assert collect_blocklist(self.yesterday) == [ f'foo/{datetime_to_ts(newerer)}' ] + + +def scenario( + db_exists=False, + index_exists=False, + force_db=False, + skip_seed=False, + skip_index=False, + expected_queries=None, + expected_commands=None, +): + """ + Return a tuple of arguments for the test_scenarios function. + Includes defaults for the baseline scenario of a + totally fresh DB and index with no arguments. + """ + return ( + db_exists, + index_exists, + force_db, + skip_seed, + skip_index, + expected_queries if expected_queries is not None else ['CREATE_DB'], + expected_commands + if expected_commands is not None + else ['MIGRATE', 'INITIAL_DATA', 'SEED_DATA', 'REINDEX'], + ) + + +@override_settings(DEBUG=True) +@pytest.mark.parametrize( + 'db_exists,index_exists,force_db,skip_seed,skip_index,expected_queries,expected_commands', + [ + scenario(), + # Skip seeding will remove 'SEED_DATA' from the expected commands. + # Even if the DB doesn't exist + scenario( + db_exists=False, + skip_seed=True, + expected_commands=['MIGRATE', 'INITIAL_DATA', 'REINDEX'], + ), + # Skip indexing will remove 'REINDEX' from the expected commands. + # Even if the index doesn't exist + scenario( + index_exists=False, + skip_index=True, + expected_queries=['CREATE_DB'], + expected_commands=['MIGRATE', 'INITIAL_DATA', 'SEED_DATA'], + ), + # If the index exists, but the db does not, and we don't skip seeding + # We reindex because there is new data in the db that needs to be indexed + scenario( + db_exists=False, + index_exists=True, + skip_seed=False, + skip_index=False, + expected_queries=['CREATE_DB'], + expected_commands=['MIGRATE', 'INITIAL_DATA', 'SEED_DATA', 'REINDEX'], + ), + # Similar to above, but instead we skip seeding with a non existant index + # seed data is removed but reindexing is needed to reacreate it + scenario( + db_exists=False, + index_exists=False, + skip_seed=True, + skip_index=False, + expected_queries=['CREATE_DB'], + expected_commands=['MIGRATE', 'INITIAL_DATA', 'REINDEX'], + ), + # Even if the db exists, if we force db and don't skip seeding + # we drop existing db and reseed the new one + scenario( + db_exists=True, + force_db=True, + skip_seed=False, + expected_queries=['DROP_DB', 'CREATE_DB'], + expected_commands=['MIGRATE', 'INITIAL_DATA', 'SEED_DATA', 'REINDEX'], + ), + # Same as above but we can still skip reindexing by skipping it + scenario( + db_exists=True, + force_db=True, + skip_seed=False, + skip_index=True, + expected_queries=['DROP_DB', 'CREATE_DB'], + expected_commands=['MIGRATE', 'INITIAL_DATA', 'SEED_DATA'], + ), + # we don't load initial data if we are not creating a db + # However we do reindex if not skipping it + scenario( + db_exists=True, + force_db=False, + expected_queries=[], + expected_commands=['MIGRATE', 'REINDEX'], + ), + # Similar as above but we skip reindexing + scenario( + db_exists=True, + force_db=False, + skip_index=True, + expected_queries=[], + expected_commands=['MIGRATE'], + ), + ], +) +@mock.patch('olympia.amo.management.commands.initialize_data.call_command') +@mock.patch('olympia.amo.management.commands.initialize_data.get_es') +@mock.patch('olympia.amo.management.commands.initialize_data.mysql.connect') +def test_scenarios( + mock_mysql_connect, + mock_get_es, + mock_call_command, + db_exists, + index_exists, + force_db, + skip_seed, + skip_index, + expected_queries, + expected_commands, +): + """ + Test the initialize_data command with different scenarios. A scenario defineds: + - the background state of the application, specifically: + - 1) if the `olympia` database exists according to mysql + - 2) if the `addons` index exists according to elasticsearch + - the arguments passed to the `initialize_data` command + + We can then define what the test expects to happen given these conditions with: + - the expected ORM operations + - the expected calls other management commands from the initialize_data command + + Scenarios assume the default background state of a + totally fresh DB and index with no arguments. + + Thus each set of arguments is defining a specific deviation from the default state + as well as the exact set of ORM operations and calls to other management commands + expected from the initialize_data command. Both are asserted in order. + + This test is not exactly simple, but it allows for covering a large set of logic + to be tested in a systematic way with a large number of configurations. + """ + + # Mock the MySQL connection + mock_connection = mock.MagicMock() + mock_mysql_connect.return_value = mock_connection + + # Mock Elasticsearch client + mock_es = mock.MagicMock() + mock_get_es.return_value = mock_es + + # Database and index names + database_name = settings.DATABASES['default']['NAME'] + + _queries = { + 'CREATE_DB': f'CREATE DATABASE `{database_name}` CHARACTER SET utf8mb4', + 'DROP_DB': f'DROP DATABASE `{database_name}`', + } + _commands = { + 'MIGRATE': [('migrate', '--noinput')], + 'INITIAL_DATA': [ + ('loaddata', 'initial.json'), + ('import_prod_versions',), + ( + 'createsuperuser', + '--no-input', + '--username', + 'local_admin', + '--email', + 'local_admin@mozilla.com', + ), + ('loaddata', 'zadmin/users'), + ], + 'SEED_DATA': [ + ('reindex', '--wipe', '--force', '--noinput'), + ('generate_addons', '--app', 'firefox', 10), + ('generate_addons', '--app', 'android', 10), + ('generate_themes', 10), + ('generate_default_addons_for_frontend',), + ], + 'REINDEX': [('reindex', '--noinput', '--force')], + } + + if db_exists: + # Simulate that the database exists + mock_connection.select_db.return_value = None + mock_connection.select_db.side_effect = None + else: + # Simulate that the database does not exist by raising an exception + mock_connection.select_db.side_effect = MySQLError('Database does not exist.') + + mock_es.indices.exists.return_value = index_exists + + def _assert_db_queries_executed(query_keys): + queries = [_queries[key] for key in query_keys] + executed_queries = [ + call_args.args[0] for call_args in mock_connection.query.call_args_list + ] + + assert executed_queries == queries, ( + f'Expected queries were not executed in the correct order. ' + f'Expected: {queries}, Actual: {executed_queries}' + ) + + def _assert_commands_called_in_order(command_keys): + expected_commands = [cmd for key in command_keys for cmd in _commands[key]] + actual_commands = [ + call_args.args for call_args in mock_call_command.call_args_list + ] + assert actual_commands == expected_commands, ( + f'Commands were not called in the expected order. ' + f'Expected: {expected_commands}, Actual: {actual_commands}' + ) + + call_command( + 'initialize_data', + force_db=force_db, + skip_seed=skip_seed, + skip_index=skip_index, + ) + + # Verify DB queries are executed in the correct order + _assert_db_queries_executed(expected_queries) + + # Verify commands are called in the correct order + _assert_commands_called_in_order(expected_commands)