diff --git a/.github/workflows/deploy-apis-to-production.yml b/.github/workflows/deploy-apis-to-production.yml index 0839628cae..a56b1702ed 100644 --- a/.github/workflows/deploy-apis-to-production.yml +++ b/.github/workflows/deploy-apis-to-production.yml @@ -49,6 +49,7 @@ jobs: run_meta_data: ${{ steps.check_files.outputs.run_meta_data }} # meta data run_view: ${{ steps.check_files.outputs.run_view }} # view run_calibrate: ${{ steps.check_files.outputs.run_calibrate }} # calibrate + run_insights: ${{ steps.check_files.outputs.run_insights}} # analytics run_kafka_cluster_operator: ${{ steps.check_files.outputs.run_kafka_cluster_operator }} # kafka cluster operator run_kafka_cluster: ${{ steps.check_files.outputs.run_kafka_cluster }} # kafka cluster run_kafka_topics: ${{ steps.check_files.outputs.run_kafka_topics }} # kafka topics @@ -95,6 +96,7 @@ jobs: echo "run_kafka_topics=false" >>$GITHUB_OUTPUT echo "run_workflows=false" >>$GITHUB_OUTPUT echo "run_incentives=false" >>$GITHUB_OUTPUT + echo "run_insights=false" >>$GITHUB_OUTPUT echo "run_spatial=false" >>$GITHUB_OUTPUT echo "run_kafka_connectors=false" >>$GITHUB_OUTPUT echo "run_nginx=false" >>$GITHUB_OUTPUT @@ -254,7 +256,13 @@ jobs: if [[ $file == k8s/incentives/* ]]; then echo "run_incentives=true" >>$GITHUB_OUTPUT fi + if [[ $file == src/insights/* ]]; then + echo "run_insights=true" >>$GITHUB_OUTPUT + fi + if [[ $file == k8s/insights/* ]]; then + echo "run_insights=true" >>$GITHUB_OUTPUT + fi if [[ $file == src/spatial/* ]]; then echo "run_spatial=true" >>$GITHUB_OUTPUT fi @@ -283,6 +291,7 @@ jobs: echo "run_auth_service=true" >>$GITHUB_OUTPUT echo "run_workflows=true" >>$GITHUB_OUTPUT echo "run_incentives=true" >>$GITHUB_OUTPUT + echo "run_insights=true" >>$GITHUB_OUTPUT echo "run_spatial=true" >>$GITHUB_OUTPUT echo "run_view=true" >>$GITHUB_OUTPUT echo "run_kafka_connectors=true" >>$GITHUB_OUTPUT @@ -841,6 +850,74 @@ jobs: kubectl create configmap --dry-run=client -o yaml --from-env-file=reports.env env-analytics-report-production | kubectl replace -f - -n production kubectl create configmap --dry-run=client -o yaml --from-file=google_application_credentials.json prod-analytics-config-files | kubectl replace -f - -n production + insights: + name: build-push-deploy-insights + needs: [check, image-tag] + if: needs.check.outputs.run_insights == 'true' + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v3.5.3 + + ### run unit tests ### + + - name: Login to GCR + uses: docker/login-action@v2.2.0 + with: + registry: ${{ env.REGISTRY_URL }} + username: _json_key + password: ${{ secrets.GCR_CONFIG }} + + - name: Login to K8S + uses: azure/k8s-set-context@v3.0 + with: + method: kubeconfig + kubeconfig: ${{ secrets.K8S_CONFIG_PROD }} + + - name: Build and push API Docker Image + uses: docker/build-push-action@v4.1.1 + with: + push: true + context: src/insights/ + target: production + tags: ${{ env.REGISTRY_URL }}/${{ env.PROJECT_ID }}/airqo-insights-api:${{ needs.image-tag.outputs.build_id }},${{ env.REGISTRY_URL }}/${{ env.PROJECT_ID }}/airqo-insights-api:latest + + - name: Build and push Celery worker Image + uses: docker/build-push-action@v4.1.1 + with: + push: true + context: src/insights/ + target: celery + tags: ${{ env.REGISTRY_URL }}/${{ env.PROJECT_ID }}/airqo-insights-celery:${{ needs.image-tag.outputs.build_id }},${{ env.REGISTRY_URL }}/${{ env.PROJECT_ID }}/airqo-insights-celery:latest + + - name: Update corresponding helm values file(with retry) + uses: Wandalen/wretry.action@v1.0.36 # Retries action on fail + with: + action: fjogeleit/yaml-update-action@main # Action to retry + with: | + valueFile: "k8s/insights/values-prod.yaml" + propertyPath: "images.tag" + value: ${{ needs.image-tag.outputs.build_id }} + branch: ${{ env.DEPLOY_BRANCH }} + token: ${{ secrets.YAML_UPDATER_TOKEN }} + message: "Update insights production image tag to ${{ needs.image-tag.outputs.build_id }}" + + - name: Login to GCP + uses: google-github-actions/auth@v1.1.1 + with: + credentials_json: ${{ secrets.GCP_SA_CREDENTIALS }} + + - name: Setup Cloud SDK + uses: google-github-actions/setup-gcloud@v1.1.1 + + - name: Update the corresponding k8s configmap(s) + run: | + cd src/insights/ + gcloud secrets versions access latest --secret="prod-env-analytics" > .env + gcloud secrets versions access latest --secret="prod-key-analytics-service-account" > google_application_credentials.json + kubectl create configmap --dry-run=client -o yaml --from-env-file=.env env-insights-production | kubectl replace -f - -n production + kubectl create configmap --dry-run=client -o yaml --from-file=google_application_credentials.json prod-insights-config-files | kubectl replace -f - -n production + ### device uptime ### device-uptime: name: build-push-deploy-device-uptime diff --git a/.github/workflows/deploy-apis-to-staging.yml b/.github/workflows/deploy-apis-to-staging.yml index 3e94db0322..42a38a2b11 100644 --- a/.github/workflows/deploy-apis-to-staging.yml +++ b/.github/workflows/deploy-apis-to-staging.yml @@ -54,6 +54,7 @@ jobs: run_kafka_topics: ${{ steps.check_files.outputs.run_kafka_topics }} # kafka topics run_workflows: ${{ steps.check_files.outputs.run_workflows }} # workflows run_incentives: ${{ steps.check_files.outputs.run_incentives }} # incentives + run_insights: ${{ steps.check_files.outputs.run_insights }} # incentives run_spatial: ${{ steps.check_files.outputs.run_spatial }} # spatial run_kafka_connectors: ${{ steps.check_files.outputs.run_kafka_connectors }} # kafka connectors run_nginx: ${{ steps.check_files.outputs.run_nginx }} # nginx ingress @@ -95,6 +96,7 @@ jobs: echo "run_kafka_topics=false" >>$GITHUB_OUTPUT echo "run_workflows=false" >>$GITHUB_OUTPUT echo "run_incentives=false" >>$GITHUB_OUTPUT + echo "run_insights=false" >>$GITHUB_OUTPUT echo "run_spatial=false" >>$GITHUB_OUTPUT echo "run_kafka_connectors=false" >>$GITHUB_OUTPUT echo "run_nginx=false" >>$GITHUB_OUTPUT @@ -255,6 +257,14 @@ jobs: echo "run_incentives=true" >>$GITHUB_OUTPUT fi + if [[ $file == src/insights/* ]]; then + echo "run_insights=true" >>$GITHUB_OUTPUT + fi + + if [[ $file == k8s/insights/* ]]; then + echo "run_insights=true" >>$GITHUB_OUTPUT + fi + if [[ $file == src/spatial/* ]]; then echo "run_spatial=true" >>$GITHUB_OUTPUT fi @@ -284,6 +294,7 @@ jobs: echo "run_auth_service=true" >>$GITHUB_OUTPUT echo "run_workflows=true" >>$GITHUB_OUTPUT echo "run_incentives=true" >>$GITHUB_OUTPUT + echo "run_insights=true" >>$GITHUB_OUTPUT echo "run_spatial=true" >>$GITHUB_OUTPUT echo "run_view=true" >>$GITHUB_OUTPUT echo "run_kafka_connectors=true" >>$GITHUB_OUTPUT @@ -990,6 +1001,73 @@ jobs: kubectl create configmap --dry-run=client -o yaml --from-env-file=reports.env env-analytics-report-staging | kubectl replace -f - -n staging kubectl create configmap --dry-run=client -o yaml --from-file=google_application_credentials.json stage-analytics-config-files | kubectl replace -f - -n staging + insights: + name: build-push-deploy-insights + needs: [check, image-tag] + if: needs.check.outputs.run_insights == 'true' + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v3.5.3 + + ### run unit tests ### + + - name: Login to GCR + uses: docker/login-action@v2.2.0 + with: + registry: ${{ env.REGISTRY_URL }} + username: _json_key + password: ${{ secrets.GCR_CONFIG }} + + - name: Login to K8S + uses: azure/k8s-set-context@v3.0 + with: + method: kubeconfig + kubeconfig: ${{ secrets.K8S_CONFIG_STAGE }} + + - name: Build and push API Docker Image + uses: docker/build-push-action@v4.1.1 + with: + push: true + context: src/insights/ + target: staging + tags: ${{ env.REGISTRY_URL }}/${{ env.PROJECT_ID }}/airqo-stage-insights-api:${{ needs.image-tag.outputs.build_id }},${{ env.REGISTRY_URL }}/${{ env.PROJECT_ID }}/airqo-stage-insights-api:latest + + - name: Build and push Celery Docker Image + uses: docker/build-push-action@v4.1.1 + with: + push: true + context: src/insights/ + target: celery + tags: ${{ env.REGISTRY_URL }}/${{ env.PROJECT_ID }}/airqo-stage-insights-celery:${{ needs.image-tag.outputs.build_id }},${{ env.REGISTRY_URL }}/${{ env.PROJECT_ID }}/airqo-stage-insights-celery:latest + + - name: Update corresponding helm values file(with retry) + uses: Wandalen/wretry.action@v1.0.36 # Retries action on fail + with: + action: fjogeleit/yaml-update-action@main # Action to retry + with: | + valueFile: "k8s/insights/values-stage.yaml" + propertyPath: "images.tag" + value: ${{ needs.image-tag.outputs.build_id }} + branch: ${{ env.DEPLOY_BRANCH }} + token: ${{ secrets.YAML_UPDATER_TOKEN }} + message: "Update insights staging images tag to ${{ needs.image-tag.outputs.build_id }}" + + - name: Login to GCP + uses: google-github-actions/auth@v1.1.1 + with: + credentials_json: ${{ secrets.GCP_SA_CREDENTIALS }} + + - name: Setup Cloud SDK + uses: google-github-actions/setup-gcloud@v1.1.1 + + - name: Update the corresponding k8s configmap(s) + run: | + cd src/insights/ + gcloud secrets versions access latest --secret="sta-env-analytics" > .env + gcloud secrets versions access latest --secret="sta-key-analytics-service-account" > google_application_credentials.json + kubectl create configmap --dry-run=client -o yaml --from-env-file=.env env-insights-staging | kubectl replace -f - -n staging + kubectl create configmap --dry-run=client -o yaml --from-file=google_application_credentials.json stage-insights-config-files | kubectl replace -f - -n staging ### device uptime ### device-uptime: name: build-push-deploy-device-uptime diff --git a/k8s/analytics/values-prod.yaml b/k8s/analytics/values-prod.yaml index 020e26b218..d575dc6c30 100644 --- a/k8s/analytics/values-prod.yaml +++ b/k8s/analytics/values-prod.yaml @@ -8,7 +8,7 @@ images: celeryWorker: eu.gcr.io/airqo-250220/airqo-analytics-celery-worker reportJob: eu.gcr.io/airqo-250220/airqo-analytics-report-job devicesSummaryJob: eu.gcr.io/airqo-250220/airqo-analytics-devices-summary-job - tag: prod-fb910b65-1725810667 + tag: prod-153deed2-1725870728 api: name: airqo-analytics-api label: analytics-api diff --git a/k8s/analytics/values-stage.yaml b/k8s/analytics/values-stage.yaml index 44ba487848..63bcee1bf1 100644 --- a/k8s/analytics/values-stage.yaml +++ b/k8s/analytics/values-stage.yaml @@ -8,7 +8,7 @@ images: celeryWorker: eu.gcr.io/airqo-250220/airqo-stage-analytics-celery-worker reportJob: eu.gcr.io/airqo-250220/airqo-stage-analytics-report-job devicesSummaryJob: eu.gcr.io/airqo-250220/airqo-stage-analytics-devices-summary-job - tag: stage-5c6990cd-1719227902 + tag: stage-3c86d834-1725869877 api: name: airqo-stage-analytics-api label: sta-alytics-api diff --git a/k8s/auth-service/values-prod.yaml b/k8s/auth-service/values-prod.yaml index c3bd1c8bab..8118256eb3 100644 --- a/k8s/auth-service/values-prod.yaml +++ b/k8s/auth-service/values-prod.yaml @@ -6,7 +6,7 @@ app: replicaCount: 3 image: repository: eu.gcr.io/airqo-250220/airqo-auth-api - tag: prod-fb910b65-1725810667 + tag: prod-153deed2-1725870728 nameOverride: '' fullnameOverride: '' podAnnotations: {} diff --git a/k8s/device-registry/values-prod.yaml b/k8s/device-registry/values-prod.yaml index 6b83ad6c8c..007ce1e3cc 100644 --- a/k8s/device-registry/values-prod.yaml +++ b/k8s/device-registry/values-prod.yaml @@ -6,7 +6,7 @@ app: replicaCount: 3 image: repository: eu.gcr.io/airqo-250220/airqo-device-registry-api - tag: prod-fb910b65-1725810667 + tag: prod-153deed2-1725870728 nameOverride: '' fullnameOverride: '' podAnnotations: {} diff --git a/k8s/device-uptime/values-prod.yaml b/k8s/device-uptime/values-prod.yaml index 7e1ab5c322..76e51159a0 100644 --- a/k8s/device-uptime/values-prod.yaml +++ b/k8s/device-uptime/values-prod.yaml @@ -4,7 +4,7 @@ app: configmap: env-device-uptime-production image: repository: eu.gcr.io/airqo-250220/airqo-device-uptime-job - tag: prod-d8be0487-1725363294 + tag: prod-153deed2-1725870728 nameOverride: '' fullnameOverride: '' resources: diff --git a/k8s/device-uptime/values-stage.yaml b/k8s/device-uptime/values-stage.yaml index 7f3ddb6e67..e0b1a4a979 100644 --- a/k8s/device-uptime/values-stage.yaml +++ b/k8s/device-uptime/values-stage.yaml @@ -4,7 +4,7 @@ app: configmap: env-device-uptime-staging image: repository: eu.gcr.io/airqo-250220/airqo-stage-device-uptime-job - tag: stage-96d94ce4-1715349509 + tag: stage-3c86d834-1725869877 nameOverride: '' fullnameOverride: '' resources: diff --git a/k8s/exceedance/values-prod-airqo.yaml b/k8s/exceedance/values-prod-airqo.yaml index 08625a1ec0..f42fd69057 100644 --- a/k8s/exceedance/values-prod-airqo.yaml +++ b/k8s/exceedance/values-prod-airqo.yaml @@ -4,6 +4,6 @@ app: configmap: env-exceedance-production image: repository: eu.gcr.io/airqo-250220/airqo-exceedance-job - tag: prod-fb910b65-1725810667 + tag: prod-153deed2-1725870728 nameOverride: '' fullnameOverride: '' diff --git a/k8s/exceedance/values-prod-kcca.yaml b/k8s/exceedance/values-prod-kcca.yaml index a8c3ae16fd..d101c0bdb8 100644 --- a/k8s/exceedance/values-prod-kcca.yaml +++ b/k8s/exceedance/values-prod-kcca.yaml @@ -4,6 +4,6 @@ app: configmap: env-exceedance-production image: repository: eu.gcr.io/airqo-250220/kcca-exceedance-job - tag: prod-fb910b65-1725810667 + tag: prod-153deed2-1725870728 nameOverride: '' fullnameOverride: '' diff --git a/k8s/insights/.helmignore b/k8s/insights/.helmignore new file mode 100644 index 0000000000..50af031725 --- /dev/null +++ b/k8s/insights/.helmignore @@ -0,0 +1,22 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/k8s/insights/Chart.yaml b/k8s/insights/Chart.yaml new file mode 100644 index 0000000000..c2672c0c5b --- /dev/null +++ b/k8s/insights/Chart.yaml @@ -0,0 +1,10 @@ +apiVersion: v2 +appVersion: "2.0.0" +description: AirQo Insights Helm Chart +name: airqo-insights +home: https://airqo.net +version: 1.0.0 +maintainers: + - name: AirQo + email: support@airqo.net + url: https://airqo.net diff --git a/src/insights/api/__init__.py b/k8s/insights/charts/.gitkeep similarity index 100% rename from src/insights/api/__init__.py rename to k8s/insights/charts/.gitkeep diff --git a/k8s/insights/templates/NOTES.txt b/k8s/insights/templates/NOTES.txt new file mode 100644 index 0000000000..35183fbe65 --- /dev/null +++ b/k8s/insights/templates/NOTES.txt @@ -0,0 +1,22 @@ +1. Get the application URL by running these commands: +{{- if .Values.ingress.enabled }} +{{- range $host := .Values.ingress.hosts }} + {{- range .paths }} + http{{ if $.Values.ingress.tls }}s{{ end }}://{{ $host.host }}{{ .path }} + {{- end }} +{{- end }} +{{- else if contains "NodePort" .Values.service.type }} + export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "airqo-analytics.fullname" . }}) + export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}") + echo http://$NODE_IP:$NODE_PORT +{{- else if contains "LoadBalancer" .Values.service.type }} + NOTE: It may take a few minutes for the LoadBalancer IP to be available. + You can watch the status of by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include "airqo-analytics.fullname" . }}' + export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "airqo-analytics.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}") + echo http://$SERVICE_IP:{{ .Values.service.port }} +{{- else if contains "ClusterIP" .Values.service.type }} + export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "airqo-analytics.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}") + export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}") + echo "Visit http://127.0.0.1:8080 to use your application" + kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8080:$CONTAINER_PORT +{{- end }} diff --git a/k8s/insights/templates/_helpers.tpl b/k8s/insights/templates/_helpers.tpl new file mode 100644 index 0000000000..347c24700d --- /dev/null +++ b/k8s/insights/templates/_helpers.tpl @@ -0,0 +1,62 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "airqo-analytics.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "airqo-analytics.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "airqo-analytics.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "airqo-analytics.labels" -}} +helm.sh/chart: {{ include "airqo-analytics.chart" . }} +{{ include "airqo-analytics.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "airqo-analytics.selectorLabels" -}} +app.kubernetes.io/name: {{ include "airqo-analytics.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Create the name of the service account to use +*/}} +{{- define "airqo-analytics.serviceAccountName" -}} +{{- if .Values.serviceAccount.create }} +{{- default (include "airqo-analytics.fullname" .) .Values.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.serviceAccount.name }} +{{- end }} +{{- end }} diff --git a/k8s/insights/templates/api.yaml b/k8s/insights/templates/api.yaml new file mode 100644 index 0000000000..ab4f10cdb2 --- /dev/null +++ b/k8s/insights/templates/api.yaml @@ -0,0 +1,77 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ .Values.api.name }} + namespace: {{ .Values.namespace }} + labels: + {{- include "airqo-insights.labels" . | nindent 4 }} +spec: + selector: + matchLabels: + app: {{ .Values.api.label }} + replicas: {{ .Values.api.replicaCount }} + revisionHistoryLimit: 2 + strategy: + type: RollingUpdate + rollingUpdate: + maxSurge: 1 + maxUnavailable: 1 + minReadySeconds: 5 + template: + metadata: + {{- with .Values.api.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + app: {{ .Values.api.label }} + spec: + affinity: + nodeAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 10 + preference: + matchExpressions: + - key: node-type + operator: In + values: + - high-memory + - weight: 1 + preference: + matchExpressions: + - key: node-type + operator: In + values: + - general-purpose + containers: + - name: {{ .Values.api.label }} + image: "{{ .Values.images.repositories.api }}:{{ .Values.images.tag}}" + imagePullPolicy: IfNotPresent + ports: + - containerPort: {{ .Values.service.targetPort }} + name: {{ .Values.api.label }} + envFrom: + - configMapRef: + name: {{ .Values.api.configmap }} + {{- with .Values.volumeMounts }} + volumeMounts: + {{- toYaml . | nindent 12 }} + {{- end }} + resources: + {{- toYaml .Values.api.resources | nindent 12 }} + - name: celery + image: "{{ .Values.images.repositories.celery }}:{{ .Values.images.tag}}" + imagePullPolicy: IfNotPresent + envFrom: + - configMapRef: + name: {{ .Values.api.configmap }} + {{- with .Values.volumeMounts }} + volumeMounts: + {{- toYaml . | nindent 12 }} + {{- end }} + resources: + {{- toYaml .Values.api.resources | nindent 12 }} + {{- with .Values.volumes }} + volumes: + {{- toYaml . | nindent 8 }} + {{- end }} diff --git a/k8s/insights/templates/hpa.yaml b/k8s/insights/templates/hpa.yaml new file mode 100644 index 0000000000..87895dc95a --- /dev/null +++ b/k8s/insights/templates/hpa.yaml @@ -0,0 +1,31 @@ +apiVersion: autoscaling/v2 +kind: HorizontalPodAutoscaler +metadata: + name: '{{ .Values.api.name }}-hpa' + namespace: {{ .Values.namespace }} + labels: + {{- include "airqo-insights.labels" . | nindent 4 }} +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: {{ .Values.api.name }} + minReplicas: {{ .Values.autoscaling.minReplicas }} + maxReplicas: {{ .Values.autoscaling.maxReplicas }} + metrics: + {{- if .Values.autoscaling.targetCPUUtilizationPercentage }} + - type: Resource + resource: + name: cpu + target: + type: Utilization + averageUtilization: {{ .Values.autoscaling.targetCPUUtilizationPercentage }} + {{- end }} + {{- if .Values.autoscaling.targetMemoryUtilizationPercentage }} + - type: Resource + resource: + name: memory + target: + type: Utilization + averageUtilization: {{ .Values.autoscaling.targetMemoryUtilizationPercentage }} + {{- end }} \ No newline at end of file diff --git a/k8s/insights/templates/service.yaml b/k8s/insights/templates/service.yaml new file mode 100644 index 0000000000..32307d8c79 --- /dev/null +++ b/k8s/insights/templates/service.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: Service +metadata: + name: '{{ .Values.api.name }}-svc' + labels: {{- include "airqo-insights.labels" . | nindent 4 }} + namespace: {{ .Values.namespace }} +spec: + type: {{ .Values.service.type }} + ports: + - port: {{ .Values.service.port }} + targetPort: {{ .Values.service.targetPort }} + protocol: {{ .Values.service.protocol }} + nodePort: {{ .Values.service.nodePort }} + selector: + app: {{ .Values.api.label }} diff --git a/k8s/insights/templates/tests/test-connection.yaml b/k8s/insights/templates/tests/test-connection.yaml new file mode 100644 index 0000000000..9152edd19f --- /dev/null +++ b/k8s/insights/templates/tests/test-connection.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: Pod +metadata: + name: "{{ include "airqo-insights.fullname" . }}-test-connection" + labels: +{{ include "airqo-insights.labels" . | indent 4 }} + annotations: + "helm.sh/hook": test-success +spec: + containers: + - name: wget + image: busybox + command: ['wget'] + args: ['{{ include "airqo-insights.fullname" . }}:{{ .Values.service.port }}'] + restartPolicy: Never \ No newline at end of file diff --git a/k8s/insights/values-prod.yaml b/k8s/insights/values-prod.yaml new file mode 100644 index 0000000000..51142fe39d --- /dev/null +++ b/k8s/insights/values-prod.yaml @@ -0,0 +1,41 @@ +namespace: production +nameOverride: '' +fullnameOverride: '' +images: + repositories: + api: eu.gcr.io/airqo-250220/airqo-insights-api + celery: eu.gcr.io/airqo-250220/airqo-insights-celery + celeryWorker: eu.gcr.io/airqo-250220/airqo-insights-celery-worker + tag: prod-153deed2-1725870728 +api: + name: airqo-insights-api + label: insights-api + configmap: env-insights-production + replicaCount: 3 + podAnnotations: {} + resources: + limits: + cpu: 100m + memory: 600Mi + requests: + cpu: 10m + memory: 250Mi +volumeMounts: + - name: config-volume + mountPath: /etc/config +volumes: + - name: config-volume + configMap: + name: prod-insights-config-files +service: + type: NodePort + port: 5000 + protocol: TCP + targetPort: 5000 + nodePort: 30003 +ingress: + enabled: false +autoscaling: + minReplicas: 1 + maxReplicas: 3 + targetMemoryUtilizationPercentage: 70 diff --git a/k8s/insights/values-stage.yaml b/k8s/insights/values-stage.yaml new file mode 100644 index 0000000000..fb579cbfad --- /dev/null +++ b/k8s/insights/values-stage.yaml @@ -0,0 +1,40 @@ +namespace: staging +nameOverride: '' +fullnameOverride: '' +images: + repositories: + api: eu.gcr.io/airqo-250220/airqo-stage-insights-api + celery: eu.gcr.io/airqo-250220/airqo-stage-insights-celery + tag: stage-d312356b-1725870215 +api: + name: airqo-stage-insights-api + label: sta-alytics-api + configmap: env-insights-staging + replicaCount: 2 + podAnnotations: {} + resources: + limits: + cpu: 100m + memory: 600Mi + requests: + cpu: 10m + memory: 250Mi +volumeMounts: + - name: config-volume + mountPath: /etc/config +volumes: + - name: config-volume + configMap: + name: stage-insights-config-files +service: + type: NodePort + port: 5000 + protocol: TCP + targetPort: 5000 + nodePort: 31003 +ingress: + enabled: false +autoscaling: + minReplicas: 1 + maxReplicas: 2 + targetMemoryUtilizationPercentage: 80 diff --git a/k8s/meta-data/values-prod.yaml b/k8s/meta-data/values-prod.yaml index 88c9086b49..7609796d1a 100644 --- a/k8s/meta-data/values-prod.yaml +++ b/k8s/meta-data/values-prod.yaml @@ -8,7 +8,7 @@ images: repositories: api: eu.gcr.io/airqo-250220/airqo-meta-data-api sitesConsumer: eu.gcr.io/airqo-250220/airqo-meta-data-sites-consumer - tag: prod-fb910b65-1725810667 + tag: prod-153deed2-1725870728 nameOverride: '' fullnameOverride: '' podAnnotations: {} diff --git a/k8s/predict/values-prod.yaml b/k8s/predict/values-prod.yaml index 6497f24bfe..ece55e5ef7 100644 --- a/k8s/predict/values-prod.yaml +++ b/k8s/predict/values-prod.yaml @@ -7,7 +7,7 @@ images: predictJob: eu.gcr.io/airqo-250220/airqo-predict-job trainJob: eu.gcr.io/airqo-250220/airqo-train-job predictPlaces: eu.gcr.io/airqo-250220/airqo-predict-places-air-quality - tag: prod-513a6ce9-1720609414 + tag: prod-153deed2-1725870728 api: name: airqo-prediction-api label: prediction-api diff --git a/k8s/predict/values-stage.yaml b/k8s/predict/values-stage.yaml index 9a076e6ff5..eeae159a50 100644 --- a/k8s/predict/values-stage.yaml +++ b/k8s/predict/values-stage.yaml @@ -7,7 +7,7 @@ images: predictJob: eu.gcr.io/airqo-250220/stage-airqo-predict-job trainJob: eu.gcr.io/airqo-250220/stage-airqo-train-job predictPlaces: eu.gcr.io/airqo-250220/stage-airqo-predict-places-air-quality - tag: stage-360b17b3-1720372758 + tag: stage-3c86d834-1725869877 api: name: airqo-stage-prediction-api label: prediction-api diff --git a/k8s/spatial/values-prod.yaml b/k8s/spatial/values-prod.yaml index c1c939f01e..2c7df3c8fa 100644 --- a/k8s/spatial/values-prod.yaml +++ b/k8s/spatial/values-prod.yaml @@ -6,7 +6,7 @@ app: replicaCount: 3 image: repository: eu.gcr.io/airqo-250220/airqo-spatial-api - tag: prod-fb910b65-1725810667 + tag: prod-153deed2-1725870728 nameOverride: '' fullnameOverride: '' podAnnotations: {} diff --git a/k8s/workflows/values-prod.yaml b/k8s/workflows/values-prod.yaml index 2db280101d..7bc5ca899d 100644 --- a/k8s/workflows/values-prod.yaml +++ b/k8s/workflows/values-prod.yaml @@ -10,7 +10,7 @@ images: initContainer: eu.gcr.io/airqo-250220/airqo-workflows-xcom redisContainer: eu.gcr.io/airqo-250220/airqo-redis containers: eu.gcr.io/airqo-250220/airqo-workflows - tag: prod-fb910b65-1725810667 + tag: prod-153deed2-1725870728 nameOverride: "" fullnameOverride: "" podAnnotations: {} diff --git a/src/analytics/README.md b/src/analytics/README.md index 8a3ee2a992..663e9c755b 100644 --- a/src/analytics/README.md +++ b/src/analytics/README.md @@ -1,10 +1,10 @@ -clone project from GitHub. +Clone project from GitHub. - `git clone https://github.com/airqo-platform/AirQo-api.git` -change directory to the analytics microservice +Change directory to the analytics microservice - `cd src/analytics` -create local python environment +Create local python environment windows diff --git a/src/device-uptime/README.md b/src/device-uptime/README.md index dca552c3ec..7cd0dbfb6e 100644 --- a/src/device-uptime/README.md +++ b/src/device-uptime/README.md @@ -22,7 +22,7 @@ - `pip install -r requirements.txt` -- Create a `.env` file with the following keys +- Create a `.env` file with the following keys. - **ENV** e.g `staging` - **REGISTRY_MONGO_GCE_URI** diff --git a/src/insights/Dockerfile b/src/insights/Dockerfile index aeedcfe74b..affddff765 100644 --- a/src/insights/Dockerfile +++ b/src/insights/Dockerfile @@ -1,37 +1,28 @@ -# syntax=docker/dockerfile:1 - -FROM python:3.10-slim-bullseye as base +FROM python:3.10.14-slim-bullseye as base WORKDIR /app/ COPY requirements.txt requirements.txt -RUN pip install --upgrade pip setuptools wheel -RUN pip3 install -r requirements.txt +RUN pip install --upgrade pip setuptools wheel --no-cache-dir && \ + pip install -r requirements.txt --no-cache-dir COPY . /app/ -ENV FLASK_APP=manage.py EXPOSE 5000 FROM base as dev ENV FLASK_ENV=development -CMD ["flask", "run", "--host=0.0.0.0"] +CMD ["flask", "run", "--host=0.0.0.0"] FROM base as staging ENV FLASK_ENV=staging -CMD ["gunicorn", "--bind=0.0.0.0:5000", "manage:app"] +CMD ["gunicorn", "--bind=0.0.0.0:5000", "app:flask_app"] FROM base as production ENV FLASK_ENV=production -CMD ["gunicorn", "--bind=0.0.0.0:5000", "manage:app"] - -FROM base as celery-beat -CMD celery -A celery_app.celery beat - -FROM base as celery-worker -CMD celery -A celery_app.celery worker -Q analytics --loglevel=info +CMD ["gunicorn", "--bind=0.0.0.0:5000", "app:flask_app"] -FROM base as devices-summary-job -CMD ["python", "devices_summary.py"] \ No newline at end of file +FROM base as celery +CMD ["celery", "-A", "app", "worker", "--loglevel=info"] diff --git a/src/insights/Jenkinsfile b/src/insights/Jenkinsfile deleted file mode 100644 index 5591187822..0000000000 --- a/src/insights/Jenkinsfile +++ /dev/null @@ -1,128 +0,0 @@ -pipeline { - agent any - environment { - IMAGE_NAME_PROD = 'airqo-insights-api' - IMAGE_NAME_STAGE = 'airqo-stage-insights-api' - } - stages { - stage("Start"){ - steps { - slackSend (color: '#1976d2', message: "BUILD STARTED: Job '${env.JOB_NAME} [${env.BUILD_NUMBER}]' (${env.BUILD_URL})") - } - } - stage("Checkout code") { - steps { - checkout scm - } - } - stage("Build for Staging"){ - when {branch "staging"} - steps { - - echo "Docker Build" - sh """ - docker build -t ${REGISTRY_URI}/${PROJECT_ID}/${IMAGE_NAME_STAGE}:"latest" -f "src/insights/Dockerfile.stage.txt" . - """ - withCredentials([file(credentialsId: "${GCR_CONFIG}", variable: 'GC_KEY')]){ - sh "cat '$GC_KEY' | docker login -u _json_key --password-stdin ${REGISTRY_URI}" - sh "gcloud auth activate-service-account --key-file='$GC_KEY'" - sh "gcloud auth configure-docker" - echo "Pushing image to GCR" - sh """ - gcloud container images add-tag ${REGISTRY_URI}/${PROJECT_ID}/${IMAGE_NAME_STAGE} ${REGISTRY_URI}/${PROJECT_ID}/${IMAGE_NAME_STAGE}:"v${env.BUILD_NUMBER}" - docker push ${REGISTRY_URI}/${PROJECT_ID}/${IMAGE_NAME_STAGE}:"latest" - """ - } - } - post{ - success{ - echo "Build and Push Successfully" - } - failure{ - echo "Build and Push Failed" - slackSend (color: '#FF0000', message: "FAILED TO BUILD AND PUSH IMAGE: Job '${env.JOB_NAME} [${env.BUILD_NUMBER}]' (${env.BUILD_URL})") - } - } - - } - stage("Build for Production"){ - when {branch "master"} - steps { - - echo "Docker Build" - sh """ - docker build -t ${REGISTRY_URI}/${PROJECT_ID}/${IMAGE_NAME_PROD}:"latest" -f "src/insights/Dockerfile" . - """ - withCredentials([file(credentialsId: "${GCR_CONFIG}", variable: 'GC_KEY')]){ - sh "cat '$GC_KEY' | docker login -u _json_key --password-stdin ${REGISTRY_URI}" - sh "gcloud auth activate-service-account --key-file='$GC_KEY'" - sh "gcloud auth configure-docker" - echo "Pushing image to GCR" - sh """ - gcloud container images add-tag ${REGISTRY_URI}/${PROJECT_ID}/${IMAGE_NAME_PROD} ${REGISTRY_URI}/${PROJECT_ID}/${IMAGE_NAME_PROD}:"v${env.BUILD_NUMBER}" - docker push ${REGISTRY_URI}/${PROJECT_ID}/${IMAGE_NAME_PROD}:"latest" - """ - } - } - post{ - success{ - echo "Build and Push Successfully" - } - failure{ - echo "Build and Push Failed" - echo 'insights service deployed successfully' - slackSend (color: '#FF0000', message: "FAILED TO BUILD AND PUSH IMAGE: Job '${env.JOB_NAME} [${env.BUILD_NUMBER}]' (${env.BUILD_URL})") - } - } - - } - stage("Deploy to Staging"){ - when {branch "staging"} - steps { - withKubeConfig([credentialsId: "${K8S_CONFIG}"]) { - sh """ - kubectl apply -f 'k8s/stage-insights-api.yaml' - kubectl rollout restart deployment/${IMAGE_NAME_STAGE} -n staging - """ - } - } - post{ - success{ - echo "Successfully deployed to staging" - } - failure{ - echo "Failed deploying to staging" - echo 'insights service deployed successfully' - slackSend (color: '#FF0000', message: "FAILED: Job '${env.JOB_NAME} [${env.BUILD_NUMBER}]' (${env.BUILD_URL})") - } - } - } - stage("Deploy to Production"){ - when {branch "master"} - steps { - withKubeConfig([credentialsId: "${K8S_CONFIG}"]) { - sh """ - kubectl apply -f 'k8s/prod-insights-api.yaml' - kubectl rollout restart deployment/${IMAGE_NAME_PROD} -n production - """ - } - } - post{ - success{ - echo "Successfully deployed to production" - } - failure{ - echo "Failed deploying to production" - echo 'insights service deployed successfully' - slackSend (color: '#FF0000', message: "FAILED: Job '${env.JOB_NAME} [${env.BUILD_NUMBER}]' (${env.BUILD_URL})") - } - } - } - stage('Final'){ - steps { - echo 'insights service deployed successfully' - slackSend (color: '#00FF00', message: "SUCCESSFUL: Job '${env.JOB_NAME} [${env.BUILD_NUMBER}]' (${env.BUILD_URL})") - } - } - } -} diff --git a/src/insights/README.md b/src/insights/README.md index 3b6f1e064f..128bd1f4b4 100644 --- a/src/insights/README.md +++ b/src/insights/README.md @@ -1,12 +1,25 @@ -clone project from GitHub. +# INSIGHTS MICROSERVICE + +This microservice contains the back-end source code for the AirQo insights & netmanager platforms. +The code is written following concepts in the Flask-restX framework [documentation](https://flask-restx.readthedocs.io/en/latest/index.html) + +## Steps to run +clone project from GitHub. - `git clone https://github.com/airqo-platform/AirQo-api.git` change directory to the insights microservice - - `cd src/insights` -create local python environment +### Using Docker +#### Prerequisites + +- Docker (>= 26.0.0) +- Docker compose (>= v2.26.1) +- `docker compose up` + +#### Running locally +- create local python environment windows @@ -18,29 +31,24 @@ linux/mac activate the environment windows - - `[local_env_name]\Scripts\activate` linux/mac - - source `[local_env_name]/bin/activate` Install dependencies using the requirements.txt - - `pip install -r requirements.txt` set environment variables windows - -- `set FLASK_APP=manage.py` +- `set FLASK_APP=app.py` - `set FLASK_ENV=development` linux/mac - -- `export FLASK_APP=manage.py` +- `export FLASK_APP=app.py` - `export FLASK_ENV=development` -Run the Flask App +Run the Flask App - `flask run` diff --git a/src/insights/api/docs/dashboard/divisions_get.yml b/src/insights/api/docs/dashboard/divisions_get.yml deleted file mode 100644 index 19d38431d1..0000000000 --- a/src/insights/api/docs/dashboard/divisions_get.yml +++ /dev/null @@ -1,43 +0,0 @@ -Example endpoint for fetching divisions ---- -tags: [Dashboard] -parameters: - - name: tenant - in: query - type: string - required: true - default: airqo -definitions: - DivisionResponse: - type: object - properties: - status: - type: string - example: success - message: - type: string - example: division successfully fetched - data: - type: array - items: - $ref: '#/definitions/DivisionData' - DivisionData: - type: object - properties: - Division: - type: string - example: Kawempe - DeviceCode: - type: string - example: A743BPWK - LocationCode: - type: string - example: KCCA_KWPE_AQ05 - Parish: - type: string - example: Wandegeya -responses: - 200: - description: A successful retrieval of division data - schema: - $ref: '#/definitions/DivisionResponse' diff --git a/src/insights/api/docs/dashboard/monitoring_site_location_get.yml b/src/insights/api/docs/dashboard/monitoring_site_location_get.yml deleted file mode 100644 index ae7d2ed089..0000000000 --- a/src/insights/api/docs/dashboard/monitoring_site_location_get.yml +++ /dev/null @@ -1,64 +0,0 @@ -Example endpoint for fetching monitoring site location data ---- -tags: [Dashboard] -parameters: - - name: tenant - in: query - type: string - required: true - default: airqo - - name: orgName - description: organisation name incase it's different from the tenant value (compatibility reasons) - in: query - type: string - required: false -definitions: - MonitoringSiteLocationResponse: - type: object - properties: - status: - type: string - example: success - message: - type: string - example: monitoring site location data successfully fetched - data: - type: array - items: - $ref: '#/definitions/MonitoringSiteLocationData' - MonitoringSiteLocationData: - type: object - properties: - DeviceCode: - type: string - example: A743BPWK - Parish: - type: string - example: Wandegeya - Division: - type: string - example: Kawempe - Last_Hour_PM25_Value: - type: float - example: 49 - Latitude: - type: float - example: 0.331732 - Longitude: - type: float - example: 32.57455 - LocationCode: - type: string - example: KCCA_KWPE_AQ05 - LastHour: - type: string - example: "2021-03-03 11:00" - _id: - type: string - example: 5e78a8e66a7e115ad4a4e6c6 - -responses: - 200: - description: A successful retrieval of monitoring site location data - schema: - $ref: '#/definitions/MonitoringSiteLocationResponse' diff --git a/src/insights/api/docs/dashboard/pm25_location_count_get.yml b/src/insights/api/docs/dashboard/pm25_location_count_get.yml deleted file mode 100644 index 4826247aa5..0000000000 --- a/src/insights/api/docs/dashboard/pm25_location_count_get.yml +++ /dev/null @@ -1,53 +0,0 @@ -Example endpoint returning pm2.5 category location count ---- -tags: [Dashboard] -parameters: - - name: tenant - in: query - type: string - required: true - default: airqo -definitions: - CategoryLocationCountResponse: - type: object - properties: - status: - type: string - message: - type: string - data: - type: array - items: - $ref: '#/definitions/LocationCategory' - example: - status: success - message: location count successfully fetched - data: - - locations_with_category_good: - - category_name: Good - category_count: 0 - category_locations: [] - - locations_with_category_UH4SG: - - category_name: UH4SG - category_count: 4 - category_locations: [Kawala, Ggaba, Nsambya Central, Mutundwe] - - LocationCategory: - type: object - properties: - location_category: - type: object - properties: - category_name: - type: string - category_count: - type: integer - category_locations: - type: array - items: - type: string -responses: - 200: - description: A list of pm2.5 category location count - schema: - $ref: '#/definitions/CategoryLocationCountResponse' diff --git a/src/insights/api/docs/report/default_report_template_get.yml b/src/insights/api/docs/report/default_report_template_get.yml deleted file mode 100644 index 58d61b0426..0000000000 --- a/src/insights/api/docs/report/default_report_template_get.yml +++ /dev/null @@ -1,39 +0,0 @@ -Example endpoint for retrieving default report template ---- -tags: [Report] -parameters: - - name: tenant - in: query - type: string - required: true - default: airqo -responses: - 200: - description: A successful retrieval of default report template - schema: - type: object - properties: - status: - type: string - example: success - message: - type: string - example: default report successfully fetched - data: - type: object - properties: - report: - type: object - properties: - _id: - type: string - user_id: - type: string - report_date: - type: datetime - report_type: - type: string - report_name: - type: string - report_body: - type: object \ No newline at end of file diff --git a/src/insights/api/docs/report/default_report_template_patch.yml b/src/insights/api/docs/report/default_report_template_patch.yml deleted file mode 100644 index cd2ca9e42b..0000000000 --- a/src/insights/api/docs/report/default_report_template_patch.yml +++ /dev/null @@ -1,33 +0,0 @@ -Example endpoint for updating default report template ---- -tags: [Report] -parameters: - - name: tenant - in: query - type: string - required: true - default: airqo - - name: body - in: body - type: object - properties: - userId: - type: string - example: - reportName: - type: string - example: - reportBody: - type: object -responses: - 202: - description: A successful update of report template - schema: - type: object - properties: - status: - type: string - example: success - message: - type: string - example: default reporting template updated successfully diff --git a/src/insights/api/docs/report/default_report_template_post.yml b/src/insights/api/docs/report/default_report_template_post.yml deleted file mode 100644 index e3e7c4b356..0000000000 --- a/src/insights/api/docs/report/default_report_template_post.yml +++ /dev/null @@ -1,33 +0,0 @@ -Example endpoint for creating default report template ---- -tags: [Report] -parameters: - - name: tenant - in: query - type: string - required: true - default: airqo - - name: body - in: body - type: object - properties: - userId: - type: string - example: - reportName: - type: string - example: - reportBody: - type: object -responses: - 201: - description: A successful creation of report template - schema: - type: object - properties: - status: - type: string - example: success - message: - type: string - example: Default Report Template Saved Successfully diff --git a/src/insights/api/docs/report/monthly_report_extra_delete.yml b/src/insights/api/docs/report/monthly_report_extra_delete.yml deleted file mode 100644 index 5d980e654c..0000000000 --- a/src/insights/api/docs/report/monthly_report_extra_delete.yml +++ /dev/null @@ -1,25 +0,0 @@ -Example endpoint for deleting monthly report ---- -tags: [Report] -parameters: - - name: tenant - in: query - type: string - required: true - default: airqo - - name: report_name - in: path - required: true - type: string -responses: - 200: - description: A successful deletion of monthly report - schema: - type: object - properties: - status: - type: string - example: success - message: - type: string - example: monthly report {report_name} deleted successfully diff --git a/src/insights/api/docs/report/monthly_report_extra_post.yml b/src/insights/api/docs/report/monthly_report_extra_post.yml deleted file mode 100644 index bbd479eb04..0000000000 --- a/src/insights/api/docs/report/monthly_report_extra_post.yml +++ /dev/null @@ -1,55 +0,0 @@ -Example endpoint for updating monthly report ---- -tags: [Report] -parameters: - - name: tenant - in: query - type: string - required: true - default: airqo - - name: report_name - in: path - required: true - type: string - - name: body - in: body - type: object - properties: - userId: - type: string - example: - reportName: - type: string - example: - reportBody: - type: object -responses: - 202: - description: A successful update of monthly report - schema: - type: object - properties: - status: - type: string - example: success - message: - type: string - example: report updated successfully - data: - type: object - properties: - report: - type: object - properties: - _id: - type: string - user_id: - type: string - report_date: - type: datetime - report_type: - type: string - report_name: - type: string - report_body: - type: object diff --git a/src/insights/api/docs/report/monthly_report_get.yml b/src/insights/api/docs/report/monthly_report_get.yml deleted file mode 100644 index c6e8c10d69..0000000000 --- a/src/insights/api/docs/report/monthly_report_get.yml +++ /dev/null @@ -1,43 +0,0 @@ -Example endpoint for getting monthly reports ---- -tags: [Report] -parameters: - - name: tenant - in: query - type: string - required: true - default: airqo - - name: userId - in: query - required: true - type: string -responses: - 200: - description: A successful retrieval of monthly reports - schema: - type: object - properties: - status: - type: string - example: success - message: - type: string - example: reports successfully fetched - data: - type: object - properties: - report: - type: object - properties: - _id: - type: string - user_id: - type: string - report_date: - type: datetime - report_type: - type: string - report_name: - type: string - report_body: - type: object diff --git a/src/insights/api/docs/report/monthly_report_post.yml b/src/insights/api/docs/report/monthly_report_post.yml deleted file mode 100644 index 57e704c780..0000000000 --- a/src/insights/api/docs/report/monthly_report_post.yml +++ /dev/null @@ -1,33 +0,0 @@ -Example endpoint for creating monthly report ---- -tags: [Report] -parameters: - - name: tenant - in: query - type: string - required: true - default: airqo - - name: body - in: body - type: object - properties: - userId: - type: string - example: - reportName: - type: string - example: - reportBody: - type: object -responses: - 201: - description: A successful creation of monthly report - schema: - type: object - properties: - status: - type: string - example: success - message: - type: string - example: Monthly Report Saved Successfully diff --git a/src/insights/api/middlewares/__init__.py b/src/insights/api/middlewares/__init__.py deleted file mode 100644 index 8fb943331b..0000000000 --- a/src/insights/api/middlewares/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from flask import Blueprint - -middleware_blueprint = Blueprint("middleware_blueprint", __name__) diff --git a/src/insights/api/middlewares/base_validator.py b/src/insights/api/middlewares/base_validator.py deleted file mode 100644 index 9cad9f4d3e..0000000000 --- a/src/insights/api/middlewares/base_validator.py +++ /dev/null @@ -1,15 +0,0 @@ -"""Module for Validation error and error handler""" - - -class ValidationError(Exception): - """Base Validation class for handling validation errors""" - - def __init__(self, error, status_code=None): - Exception.__init__(self) - self.status_code = status_code if status_code else 400 - self.error = error - self.error["status"] = "error" - self.error["message"] = error["message"] - - def to_dict(self): - return self.error diff --git a/src/insights/api/models/__init__.py b/src/insights/api/models/__init__.py deleted file mode 100644 index 952344af97..0000000000 --- a/src/insights/api/models/__init__.py +++ /dev/null @@ -1,9 +0,0 @@ -from .collection import Collection -from .site import SiteModel -from .events import EventsModel -from .exceedance import ExceedanceModel -from .data_export import ( - DataExportModel, - DataExportStatus, - DataExportRequest, -) diff --git a/src/insights/api/models/collection.py b/src/insights/api/models/collection.py deleted file mode 100644 index d2c63f86eb..0000000000 --- a/src/insights/api/models/collection.py +++ /dev/null @@ -1,6 +0,0 @@ -from api.models.base.base_model import BasePyMongoModel - - -class Collection(BasePyMongoModel): - def __init__(self, tenant, collection_name): - super().__init__(tenant, collection_name=collection_name) diff --git a/src/insights/api/models/data_export.py b/src/insights/api/models/data_export.py deleted file mode 100644 index 94577cf605..0000000000 --- a/src/insights/api/models/data_export.py +++ /dev/null @@ -1,300 +0,0 @@ -import json -import traceback -from dataclasses import dataclass, asdict -from datetime import datetime -from enum import Enum - -import pandas as pd -import pymongo -from bson import ObjectId -from bson.errors import InvalidId -from google.cloud import bigquery, storage - -from api.models.base.base_model import BasePyMongoModel -from api.utils.dates import date_to_str -from api.utils.exceptions import ExportRequestNotFound -from config import Config - - -class DataExportStatus(Enum): - SCHEDULED = "scheduled" - PROCESSING = "processing" - READY = "ready" - FAILED = "failed" - NO_DATA = "no_data" - - -class Frequency(Enum): - RAW = "raw" - HOURLY = "hourly" - DAILY = "daily" - - -class DataExportFormat(Enum): - JSON = "json" - CSV = "csv" - - -@dataclass -class DataExportRequest: - status: DataExportStatus - frequency: Frequency - export_format: DataExportFormat - - request_date: datetime - start_date: datetime - end_date: datetime - - data_links: list[str] - request_id: str - user_id: str - - sites: list[str] - devices: list[str] - airqlouds: list[str] - pollutants: list[str] - retries: int - meta_data: dict - - def to_dict(self) -> dict: - _dict = asdict(self) - _dict["status"] = self.status.value - _dict["frequency"] = self.frequency.value - _dict["export_format"] = self.export_format.value - return _dict - - def to_api_format(self) -> dict: - _dict = asdict(self) - _dict["status"] = str(self.status.value).replace("_", " ").capitalize() - _dict["frequency"] = self.frequency.value - _dict["export_format"] = self.export_format.value - _dict["request_date"] = date_to_str(self.request_date) - _dict["start_date"] = date_to_str(self.start_date) - _dict["end_date"] = date_to_str(self.end_date) - return _dict - - def destination_file(self) -> str: - return f"{self.user_id}_{date_to_str(self.request_date) }-*.{self.export_format.value}" - - def destination_bucket(self) -> str: - return f"{self.user_id}/{self.request_id}/{date_to_str(self.request_date)}-*.{self.export_format.value}" - - def gcs_folder(self) -> str: - return f"{self.user_id}/{self.request_id}/" - - def gcs_file(self) -> str: - return f"download_*.{self.export_format.value}" - - def bigquery_table(self) -> str: - return f"{self.user_id}_{self.request_id.replace('-', '_')}" - - def export_table_name(self) -> str: - return f"{self.user_id}_{date_to_str(self.request_date)}" - - -class DataExportModel(BasePyMongoModel): - def __init__(self): - super().__init__(collection_name=Config.DATA_EXPORT_COLLECTION, tenant="airqo") - self.bigquery_client = bigquery.Client() - self.cloud_storage_client = storage.Client() - self.dataset = Config.DATA_EXPORT_DATASET - self.project = Config.DATA_EXPORT_GCP_PROJECT - self.bucket = self.cloud_storage_client.get_bucket(Config.DATA_EXPORT_BUCKET) - - @staticmethod - def doc_to_data_export_request(doc) -> DataExportRequest: - return DataExportRequest( - request_id=str(doc["_id"]), - devices=doc["devices"], - start_date=doc["start_date"], - end_date=doc["end_date"], - airqlouds=doc["airqlouds"], - sites=doc["sites"], - data_links=doc["data_links"], - request_date=doc["request_date"], - user_id=doc["user_id"], - status=DataExportStatus[str(doc["status"]).upper()], - frequency=Frequency[str(doc["frequency"]).upper()], - export_format=DataExportFormat[str(doc["export_format"]).upper()], - pollutants=doc["pollutants"], - retries=doc.get("retries", 3), - meta_data=doc.get("meta_data", {}), - ) - - def docs_to_data_export_requests(self, docs: list) -> list[DataExportRequest]: - data: list[DataExportRequest] = [] - for doc in docs: - try: - doc_data = self.doc_to_data_export_request(doc) - data.append(doc_data) - except Exception as ex: - print(ex) - traceback.print_exc() - - return data - - def create_request(self, request: DataExportRequest): - self.collection.insert_one(request.to_dict()) - - def get_scheduled_and_failed_requests(self) -> list[DataExportRequest]: - filter_set = { - "$or": [ - {"status": {"$eq": DataExportStatus.SCHEDULED.value}}, - { - "$and": [ - {"status": {"$eq": DataExportStatus.FAILED.value}}, - {"retires": {"$gt": 0}}, - ] - }, - ] - } - - docs = self.collection.find(filter_set) - return self.docs_to_data_export_requests(docs) - - def update_request_status_and_retries(self, request: DataExportRequest) -> bool: - try: - data = request.to_dict() - filter_set = {"_id": ObjectId(f"{request.request_id}")} - update_set = { - "$set": {"status": data["status"], "retries": data["retries"]} - } - result = self.collection.update_one(filter_set, update_set) - return result.modified_count == 1 - except Exception as ex: - print(ex) - return False - - def update_request_status_and_data_links(self, request: DataExportRequest) -> bool: - try: - filter_set = {"_id": ObjectId(f"{request.request_id}")} - data = request.to_dict() - update_set = { - "$set": { - "status": data["status"], - "data_links": data["data_links"], - } - } - result = self.collection.update_one(filter_set, update_set) - return result.modified_count == 1 - except Exception as ex: - print(ex) - return False - - def get_user_requests(self, user_id: str) -> list[DataExportRequest]: - docs = self.collection.find({"user_id": user_id}).sort( - "request_date", pymongo.DESCENDING - ) - return self.docs_to_data_export_requests(docs) - - def get_request_by_id(self, request_id: str) -> DataExportRequest: - try: - filter_set = {"_id": ObjectId(request_id)} - except InvalidId: - raise ExportRequestNotFound(request_id=request_id) - - doc = self.collection.find_one(filter_set) - if not doc: - raise ExportRequestNotFound(request_id=request_id) - return self.doc_to_data_export_request(doc) - - def export_table_to_gcs(self, export_request: DataExportRequest): - blobs = self.bucket.list_blobs(prefix=export_request.gcs_folder()) - for blob in blobs: - blob.delete() - - destination_uri = f"https://storage.cloud.google.com/{self.bucket.name}/{export_request.gcs_folder()}{export_request.gcs_file()}" - extract_job_config = bigquery.job.ExtractJobConfig() - if export_request.export_format == DataExportFormat.CSV: - extract_job_config.destination_format = bigquery.DestinationFormat.CSV - elif export_request.export_format == DataExportFormat.JSON: - extract_job_config.destination_format = ( - bigquery.DestinationFormat.NEWLINE_DELIMITED_JSON - ) - - extract_job_config.print_header = True - extract_job = self.bigquery_client.extract_table( - f"{self.dataset}.{export_request.bigquery_table()}", - destination_uri, - job_config=extract_job_config, - location="EU", - ) - - extract_job.result() - - def get_data_links(self, export_request: DataExportRequest) -> [str]: - blobs = self.bucket.list_blobs(prefix=export_request.gcs_folder()) - return [ - f"https://storage.cloud.google.com/{self.bucket.name}/{blob.name}" - for blob in blobs - if not blob.name.endswith("/") - ] - - def has_data(self, query) -> bool: - job_config = bigquery.QueryJobConfig() - job_config.use_query_cache = True - total_rows = ( - bigquery.Client() - .query(f"select * from ({query}) limit 1", job_config) - .result() - .total_rows - ) - return total_rows > 0 - - def export_query_results_to_table(self, query, export_request: DataExportRequest): - job_config = bigquery.QueryJobConfig( - destination=f"{self.project}.{self.dataset}.{export_request.bigquery_table()}" - ) - job_config.write_disposition = bigquery.WriteDisposition.WRITE_TRUNCATE - job = self.bigquery_client.query(query, job_config=job_config) - job.result() - - def upload_file_to_gcs( - self, contents: pd.DataFrame, export_request: DataExportRequest - ) -> str: - blob = self.bucket.blob(export_request.destination_file()) - - contents.reset_index(drop=True, inplace=True) - if export_request.export_format == DataExportFormat.CSV: - blob.upload_from_string( - data=contents.to_csv(index=False), - content_type="text/csv", - timeout=300, - num_retries=2, - ) - elif export_request.export_format == DataExportFormat.JSON: - data = contents.to_dict("records") - blob.upload_from_string( - data=json.dumps(data), - content_type="application/json", - timeout=300, - num_retries=2, - ) - - return f"https://storage.cloud.google.com/{self.bucket.name}/{export_request.destination_file()}" - - def export_query_results_to_gcs(self, query, export_request: DataExportRequest): - destination_uri = f"https://storage.cloud.google.com/{self.bucket.name}/{export_request.destination_file()}.gz" - - job_config = bigquery.QueryJobConfig() - extract_job_config = bigquery.job.ExtractJobConfig() - extract_job_config.destination_format = bigquery.DestinationFormat.CSV - extract_job_config.compression = bigquery.Compression.GZIP - extract_job_config.print_header = True - - job = self.bigquery_client.query(query, job_config=job_config) - job.result() - - destination_table = job.destination - - extract_job = self.bigquery_client.extract_table( - destination_table, - destination_uri, - job_config=extract_job_config, - location="US", - ) - - extract_job.result() - - print(f"Query results exported to {destination_uri}") - return destination_uri diff --git a/src/insights/api/utils/request_validators.py b/src/insights/api/utils/request_validators.py deleted file mode 100644 index 608c2522f3..0000000000 --- a/src/insights/api/utils/request_validators.py +++ /dev/null @@ -1,235 +0,0 @@ -from collections import OrderedDict -from dataclasses import dataclass -from datetime import datetime -from functools import wraps -import re - -# third-party imports -from flask import request - -from api.utils.http import Status -from api.utils.pollutants import PM_25_CATEGORY - - -@dataclass -class ValidatorResult: - is_valid: bool - error_msg: str - - -@dataclass -class Rule: - key: str - validators: list - - -class Validator(OrderedDict): - """A custom orderedDict class to ensure the inorder in which the validators are called is preserved""" - - EMAIL_REGEX = re.compile( - r"^[\-a-zA-Z0-9_]+(\.[\-a-zA-Z0-9_]+)*@[\-a-z]+\.[\-a-zA-Z0-9_]+\Z", - re.I | re.UNICODE, - ) - - URL_REGEX = re.compile( - r"^(http(s)?:\/\/)?[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}\b([-a-zA-Z0-9@:%_\+.~#?&//=]*)$" - ) - - DATA_TYPES = ["csv", "json"] - - PM_CATEGORIES = PM_25_CATEGORY.keys() - - POLLUTANTS = ["pm2_5", "pm10", "no2"] - - def __init__(self, validation_type=""): - super().__init__() - self.validation_type = validation_type - self["required"] = self.type_validator(self.none_checker, "is required") - self["optional"] = self.type_validator(self.optional_checker, "is optional") - self["int"] = self.type_validator(int, "{} is not a valid integer") - self["float"] = self.type_validator(float, "{} is not a valid float") - self["list"] = self.type_validator( - self.list_checker, "{} is not a valid list/array" - ) - self["bool"] = self.type_validator( - self.str_to_bool, "{} is not a valid boolean" - ) - self["dict"] = self.type_validator(dict, "{} is not a valid dict") - self["str"] = self.type_validator(str, "{} is not a valid string") - self["date"] = self.type_validator( - self.date_checker, "{} is not a valid date. Date format is '%Y-%M-%D'" - ) - self["datetime"] = self.type_validator( - self.datetime_checker, - "{} is not a valid datetime. Date format is '%Y-%m-%dT%H:%M:%S.%fZ' e.g '2021-02-08T21:00:00.000Z'", - ) - self["email"] = self.type_validator( - self.email_checker, "{} is not a valid email" - ) - self["url"] = self.type_validator(self.url_checker, "{} is not a valid url") - self["data"] = self.type_validator( - self.data_checker, - f"{{}} is not a supported data format. Supported data formats are {self.DATA_TYPES}", - ) - self["pmCategory"] = self.type_validator( - self.pm_category_checker, - f"{{}} is not a valid pm category. Valid categories are {self.PM_CATEGORIES}", - ) - - def type_validator(self, type_checker, error_msg): - def validator(value): - try: - type_checker(value) - return ValidatorResult(is_valid=True, error_msg="") - except (ValueError, TypeError): - return ValidatorResult( - is_valid=False, - error_msg=f"{error_msg.format(value)} (in {self.validation_type})", - ) - - return validator - - @staticmethod - def list_checker(value): - if not isinstance(value, list): - raise TypeError("not a valid list") - - @staticmethod - def str_to_bool(value): - if str(value).lower() in ["true", "1", "t", "y", "yes"]: - return True - if str(value).lower() in ["false", "0", "f", "n", "no"]: - return False - - raise (TypeError("cannot convert {} to bool type".format(value))) - - @staticmethod - def date_checker(value): - try: - datetime.strptime(value, "%Y-%m-%d") - except Exception: - raise TypeError("cannot convert {} to date type".format(value)) - - @staticmethod - def datetime_checker(value): - try: - datetime.strptime(value, "%Y-%m-%dT%H:%M:%S.%fZ") - except Exception: - raise TypeError("cannot convert {} to datetime type".format(value)) - - @staticmethod - def none_checker(value): - if not value: - raise TypeError("value can not be none/falsy") - - @staticmethod - def optional_checker(value): - pass - - @classmethod - def email_checker(cls, value): - if not cls.EMAIL_REGEX.match(value): - raise TypeError("invalid email address") - - @classmethod - def url_checker(cls, value): - if not re.match(cls.URL_REGEX, value): - raise TypeError("invalid url") - - @classmethod - def data_checker(cls, value): - if value not in cls.DATA_TYPES: - raise TypeError("invalid data type") - - @classmethod - def pm_category_checker(cls, value): - if value not in cls.PM_CATEGORIES: - raise TypeError("invalid pm category type") - - @classmethod - def pollutant_checker(cls, value): - if value not in cls.POLLUTANTS: - raise TypeError("invalid pollutant type") - - @staticmethod - def parse_rule(rule): - """ - Splits into key and validators components - Args: - rule: a str representing a validation rule e.g "email|required:email" - - Returns: a dataclass holding the key and validators extracted from the rule - - """ - try: - key, validators = rule.split("|") - return Rule(key=key, validators=validators.split(":")) - except Exception as exc: - print(exc) - raise Exception("Invalid rule") - - -def request_validator_factory(input_source, info): - """ - Factory function for creating request validator decorators - Args: - input_source: func that request data input source (request.args or request.get_json() - info: message to be appended in the error messages - - Returns: request decorator - - """ - - def validate_request(*rules): - """decorator for validating query params - :rules: list - The sets of keys and rules to be applied - example: [username|required:str, email|required:email] - """ - - def validate_params(func): - @wraps(func) - def decorated(*args, **kwargs): - source = input_source() or {} - validator = Validator(validation_type=info) - errors = {} - - for rule in rules: - rule = validator.parse_rule(rule) - - for validation_type in rule.validators: - validation_func = validator.get(validation_type) - value = source.get(rule.key) - v = validation_func(value) - - if validation_type != "required" and not value: - continue - - if not v.is_valid: - errors[rule.key] = v.error_msg - break - - if errors: - return { - "status": "error", - "message": "an error occurred while processing this request", - "errors": errors, - }, Status.HTTP_400_BAD_REQUEST - - return func(*args, **kwargs) - - return decorated - - return validate_params - - return validate_request - - -validate_request_params = request_validator_factory( - input_source=lambda: request.args, - info="query parameters", -) - -validate_request_json = request_validator_factory( - input_source=lambda: request.get_json(), - info="json body", -) diff --git a/src/insights/api/views/__init__.py b/src/insights/api/views/__init__.py deleted file mode 100644 index d8b2879ae9..0000000000 --- a/src/insights/api/views/__init__.py +++ /dev/null @@ -1,10 +0,0 @@ -from .dashboard import ( - ExceedancesResource, - ExceedancesResource2, - DailyAveragesResource, - DailyAveragesResource2, - ChartDataResource, - MonitoringSiteResource, -) - -from .data import DataExportResource, DataSummaryResource, DataExportV2Resource diff --git a/src/insights/api/views/data.py b/src/insights/api/views/data.py deleted file mode 100644 index c189e43fa7..0000000000 --- a/src/insights/api/views/data.py +++ /dev/null @@ -1,447 +0,0 @@ -import datetime -import traceback - -import flask_excel as excel -import pandas as pd -from flasgger import swag_from -from flask import request -from flask_restx import Resource - -from api.models import ( - EventsModel, -) -from api.models.data_export import ( - DataExportRequest, - DataExportModel, - DataExportStatus, - DataExportFormat, - Frequency, -) -from api.utils.data_formatters import filter_non_private_entities, Entity - -# Middlewares -from api.utils.data_formatters import ( - format_to_aqcsv, - compute_airqloud_summary, -) -from api.utils.dates import str_to_date, date_to_str -from api.utils.exceptions import ExportRequestNotFound -from api.utils.http import create_response, Status -from api.utils.request_validators import validate_request_json, validate_request_params -from main import rest_api_v2 - - -@rest_api_v2.errorhandler(ExportRequestNotFound) -def batch_not_found_exception(error): - return ( - create_response(error.message, data={}, success=False), - Status.HTTP_400_BAD_REQUEST, - ) - - -@rest_api_v2.route("/data-download") -class DataExportResource(Resource): - @swag_from("/api/docs/dashboard/download_custom_data_post.yml") - @validate_request_json( - "startDateTime|required:datetime", - "endDateTime|required:datetime", - "frequency|optional:str", - "weatherFields|optional:list", - "downloadType|optional:str", - "outputFormat|optional:str", - "pollutants|optional:list", - "sites|optional:list", - "devices|optional:list", - "airqlouds|optional:list", - ) - def post(self): - valid_pollutants = ["pm2_5", "pm10", "no2"] - valid_download_types = ["csv", "json"] - valid_output_formats = ["airqo-standard", "aqcsv"] - valid_frequencies = ["hourly", "daily", "raw"] - - json_data = request.get_json() - - start_date = json_data["startDateTime"] - end_date = json_data["endDateTime"] - sites = filter_non_private_entities( - entities=json_data.get("sites", []), entity_type=Entity.SITES - ) - devices = filter_non_private_entities( - entities=json_data.get("devices", []), entity_type=Entity.DEVICES - ) - airqlouds = json_data.get("airqlouds", []) - pollutants = json_data.get("pollutants", valid_pollutants) - weather_fields = json_data.get("weatherFields", None) - frequency = f"{json_data.get('frequency', valid_frequencies[0])}".lower() - download_type = ( - f"{json_data.get('downloadType', valid_download_types[0])}".lower() - ) - output_format = ( - f"{json_data.get('outputFormat', valid_output_formats[0])}".lower() - ) - - if sum([len(sites) == 0, len(devices) == 0, len(airqlouds) == 0]) == 3: - return ( - create_response( - f"Specify either a list of airqlouds, sites or devices in the request body", - success=False, - ), - Status.HTTP_400_BAD_REQUEST, - ) - - if sum([len(sites) != 0, len(devices) != 0, len(airqlouds) != 0]) != 1: - return ( - create_response( - f"You cannot specify airqlouds, sites and devices in one go", - success=False, - ), - Status.HTTP_400_BAD_REQUEST, - ) - - if frequency not in valid_frequencies: - return ( - create_response( - f"Invalid frequency {frequency}. Valid string values are any of {', '.join(valid_frequencies)}", - success=False, - ), - Status.HTTP_400_BAD_REQUEST, - ) - - if download_type not in valid_download_types: - return ( - create_response( - f"Invalid download type {download_type}. Valid string values are any of {', '.join(valid_download_types)}", - success=False, - ), - Status.HTTP_400_BAD_REQUEST, - ) - - if output_format not in valid_output_formats: - return ( - create_response( - f"Invalid output format {output_format}. Valid string values are any of {', '.join(valid_output_formats)}", - success=False, - ), - Status.HTTP_400_BAD_REQUEST, - ) - - for pollutant in pollutants: - if pollutant not in valid_pollutants: - return ( - create_response( - f"Invalid pollutant {pollutant}. Valid values are {', '.join(valid_pollutants)}", - success=False, - ), - Status.HTTP_400_BAD_REQUEST, - ) - - postfix = "-" if output_format == "airqo-standard" else "-aqcsv-" - - try: - data_frame = EventsModel.download_from_bigquery( - sites=sites, - devices=devices, - airqlouds=airqlouds, - start_date=start_date, - end_date=end_date, - frequency=frequency, - pollutants=pollutants, - weather_fields=weather_fields, - ) - - if data_frame.empty: - return ( - create_response("No data found", data=[]), - Status.HTTP_404_NOT_FOUND, - ) - - records = data_frame.to_dict("records") - - if output_format == "aqcsv": - records = format_to_aqcsv( - data=records, frequency=frequency, pollutants=pollutants - ) - - if download_type == "json": - return ( - create_response( - "air-quality data download successful", data=records - ), - Status.HTTP_200_OK, - ) - - return excel.make_response_from_records( - records, "csv", file_name=f"{frequency}-air-quality{postfix}data" - ) - except Exception as ex: - print(ex) - traceback.print_exc() - return ( - create_response( - f"An Error occurred while processing your request. Please contact support", - success=False, - ), - Status.HTTP_500_INTERNAL_SERVER_ERROR, - ) - - -@rest_api_v2.route("/data-export") -class DataExportV2Resource(Resource): - @validate_request_json( - "startDateTime|required:datetime", - "endDateTime|required:datetime", - "userId|required:str", - "frequency|optional:str", - "exportFormat|optional:str", - "outputFormat|optional:str", - "pollutants|optional:list", - "sites|optional:list", - "devices|optional:list", - "airqlouds|optional:list", - "meta_data|optional:dict", - ) - def post(self): - valid_pollutants = ["pm2_5", "pm10", "no2"] - valid_export_formats = ["csv", "json"] - valid_output_formats = ["airqo-standard", "aqcsv"] - valid_frequencies = ["hourly", "daily", "raw"] - - json_data = request.get_json() - - start_date = json_data["startDateTime"] - end_date = json_data["endDateTime"] - meta_data = json_data.get("meta_data", []) - sites = filter_non_private_entities( - entities=json_data.get("sites", []), entity_type=Entity.SITES - ) - devices = filter_non_private_entities( - entities=json_data.get("devices", []), entity_type=Entity.DEVICES - ) - airqlouds = json_data.get("airqlouds", []) - pollutants = json_data.get("pollutants", valid_pollutants) - user_id = json_data.get("userId") - frequency = f"{json_data.get('frequency', valid_frequencies[0])}".lower() - export_format = ( - f"{json_data.get('exportFormat', valid_export_formats[0])}".lower() - ) - output_format = ( - f"{json_data.get('outputFormat', valid_output_formats[0])}".lower() - ) - - if len(airqlouds) != 0: - devices = [] - sites = [] - elif len(sites) != 0: - devices = [] - airqlouds = [] - elif len(devices) != 0: - airqlouds = [] - sites = [] - else: - return ( - create_response( - f"Specify either a list of airqlouds, sites or devices in the request body", - success=False, - ), - Status.HTTP_400_BAD_REQUEST, - ) - - if frequency not in valid_frequencies: - return ( - create_response( - f"Invalid frequency {frequency}. Valid string values are any of {', '.join(valid_frequencies)}", - success=False, - ), - Status.HTTP_400_BAD_REQUEST, - ) - - if export_format not in valid_export_formats: - return ( - create_response( - f"Invalid download type {export_format}. Valid string values are any of {', '.join(valid_export_formats)}", - success=False, - ), - Status.HTTP_400_BAD_REQUEST, - ) - - if output_format not in valid_output_formats: - return ( - create_response( - f"Invalid output format {output_format}. Valid string values are any of {', '.join(valid_output_formats)}", - success=False, - ), - Status.HTTP_400_BAD_REQUEST, - ) - - for pollutant in pollutants: - if pollutant not in valid_pollutants: - return ( - create_response( - f"Invalid pollutant {pollutant}. Valid values are {', '.join(valid_pollutants)}", - success=False, - ), - Status.HTTP_400_BAD_REQUEST, - ) - - try: - data_export_model = DataExportModel() - data_export_request = DataExportRequest( - airqlouds=airqlouds, - start_date=str_to_date(start_date), - end_date=str_to_date(end_date), - sites=sites, - status=DataExportStatus.SCHEDULED, - data_links=[], - request_date=datetime.datetime.utcnow(), - user_id=user_id, - frequency=Frequency[frequency.upper()], - export_format=DataExportFormat[export_format.upper()], - devices=devices, - request_id="", - pollutants=pollutants, - retries=3, - meta_data=meta_data, - ) - - data_export_request.status = DataExportStatus.SCHEDULED - data_export_model.create_request(data_export_request) - - return ( - create_response( - "request successfully received", - data=data_export_request.to_api_format(), - ), - Status.HTTP_200_OK, - ) - - except Exception as ex: - print(ex) - traceback.print_exc() - return ( - create_response( - f"An Error occurred while processing your request. Please contact support", - success=False, - ), - Status.HTTP_500_INTERNAL_SERVER_ERROR, - ) - - @validate_request_params( - "userId|required:str", - ) - def get(self): - user_id = request.args.get("userId") - try: - data_export_model = DataExportModel() - requests = data_export_model.get_user_requests(user_id) - - data = [x.to_api_format() for x in requests] - - return ( - create_response( - "request successfully received", - data=data, - ), - Status.HTTP_200_OK, - ) - - except Exception as ex: - print(ex) - traceback.print_exc() - return ( - create_response( - f"An Error occurred while processing your request. Please contact support", - success=False, - ), - Status.HTTP_500_INTERNAL_SERVER_ERROR, - ) - - @validate_request_params( - "requestId|required:str", - ) - def patch(self): - request_id = request.args.get("requestId") - data_export_model = DataExportModel() - export_request = data_export_model.get_request_by_id(request_id) - export_request.status = DataExportStatus.SCHEDULED - export_request.retries = 3 - success = data_export_model.update_request_status_and_retries(export_request) - if success: - return ( - create_response( - "request successfully updated", - data=export_request.to_api_format(), - ), - Status.HTTP_200_OK, - ) - else: - return ( - create_response( - f"An Error occurred while processing your request. Please contact support", - success=False, - ), - Status.HTTP_500_INTERNAL_SERVER_ERROR, - ) - - -@rest_api_v2.route("/data/summary") -class DataSummaryResource(Resource): - @validate_request_json( - "startDateTime|required:datetime", - "endDateTime|required:datetime", - "airqloud|optional:str", - "cohort|optional:str", - "grid|optional:str", - ) - def post(self): - try: - json_data = request.get_json() - - start_date_time = str_to_date(json_data["startDateTime"]) - end_date_time = str_to_date(json_data["endDateTime"]) - airqloud = str(json_data.get("airqloud", "")) - cohort = str(json_data.get("cohort", "")) - grid = str(json_data.get("grid", "")) - - start_date_time = date_to_str(start_date_time, format="%Y-%m-%dT%H:00:00Z") - end_date_time = date_to_str(end_date_time, format="%Y-%m-%dT%H:00:00Z") - data = EventsModel.get_devices_summary( - airqloud=airqloud, - start_date_time=start_date_time, - end_date_time=end_date_time, - grid=grid, - cohort=cohort, - ) - - summary = compute_airqloud_summary( - data=pd.DataFrame(data), - start_date_time=start_date_time, - end_date_time=end_date_time, - ) - - if len(summary) == 0: - return ( - create_response( - f"No data found for grid {grid} from {start_date_time} to {end_date_time}", - data={}, - success=False, - ), - Status.HTTP_200_OK, - ) - - return ( - create_response("successful", data=summary), - Status.HTTP_200_OK, - ) - - except Exception as ex: - print(ex) - traceback.print_exc() - return ( - create_response( - "An Error occurred while processing your request. Please contact support", - data={}, - success=False, - ), - Status.HTTP_500_INTERNAL_SERVER_ERROR, - ) diff --git a/src/insights/api/views/report.py b/src/insights/api/views/report.py deleted file mode 100644 index 444aa0cc16..0000000000 --- a/src/insights/api/views/report.py +++ /dev/null @@ -1,272 +0,0 @@ -from datetime import datetime - -# Third-party libraries -from flasgger import swag_from -from flask import request -from flask_restx import Resource - -# models -from api.models import ReportTemplateModel -from api.models.base.data_processing import air_quality_data -from api.utils.case_converters import camel_to_snake -from api.utils.http import create_response, Status - -# Utils -from api.utils.request_validators import validate_request_params, validate_request_json - -# Middlewares -from main import rest_api_v2 - - -@rest_api_v2.route("/report/default_template") -class DefaultReportTemplateResource(Resource): - @swag_from("/api/docs/report/default_report_template_post.yml") - @validate_request_json( - "userId|required:str", "reportName|required:str", "reportBody|required:dict" - ) - def post(self): - tenant = request.args.get("tenant") - - data = request.get_json() - user_id = data["userId"] - report_name = data["reportName"] - report_body = data["reportBody"] - - report_model = ReportTemplateModel(tenant) - - count = report_model.find({"report_type": "default"}).count() - - if count > 0: - return ( - create_response("A default template already exist", success=False), - Status.HTTP_400_BAD_REQUEST, - ) - - report_model.insert( - { - "user_id": user_id, - "report_date": datetime.now(), - "report_type": "default", - "report_name": report_name, - "report_body": report_body, - } - ) - - return ( - create_response("Default Report Template Saved Successfully"), - Status.HTTP_201_CREATED, - ) - - @swag_from("/api/docs/report/default_report_template_get.yml") - def get(self): - tenant = request.args.get("tenant") - report_model = ReportTemplateModel(tenant) - - default_template = list( - report_model.filter_by(report_type="default").exec( - { - "_id": 1, - "user_id": 1, - "report_date": { - "$dateToString": { - "format": "%Y-%m-%dT%H:%M:%S%z", - "date": "$time", - "timezone": "Africa/Kampala", - }, - }, - "report_type": 1, - "report_name": 1, - "report_body": 1, - } - ) - ) - - report = default_template[0] if default_template else {} - - return ( - create_response( - "default report successfully fetched", data={"report": report} - ), - Status.HTTP_200_OK, - ) - - @swag_from("/api/docs/report/default_report_template_patch.yml") - @validate_request_json("userId|str", "reportName|str", "reportBody|dict") - def patch(self): - tenant = request.args.get("tenant") - - data = request.get_json() - - update_fields = {} - - valid_keys = ["userId", "reportName", "reportBody"] - - for key, value in data.items(): - if key in valid_keys: - update_fields[camel_to_snake(key)] = value - - if not update_fields: - return { - "message": f"the update fields is empty. valid keys are {valid_keys}" - }, Status.HTTP_400_BAD_REQUEST - - report_model = ReportTemplateModel(tenant) - - update_result = report_model.update_one( - filter_cond={"report_type": "default"}, update_fields=update_fields - ) - - if update_result.modified_count > 0 or update_result.matched_count > 0: - return ( - create_response("default reporting template updated successfully"), - Status.HTTP_202_ACCEPTED, - ) - - return ( - create_response("could not update default template", success=False), - Status.HTTP_404_NOT_FOUND, - ) - - -@rest_api_v2.route("/report/monthly") -class MonthlyReportResource(Resource): - @swag_from("/api/docs/report/monthly_report_post.yml") - @validate_request_json( - "userId|required:str", "reportName|required:str", "reportBody|required:dict" - ) - def post(self): - tenant = request.args.get("tenant") - - data = request.get_json() - user_id = data["userId"] - report_name = data["reportName"] - report_body = data["reportBody"] - - report_model = ReportTemplateModel(tenant) - - report_model.insert( - { - "user_id": user_id, - "report_date": datetime.now(), - "report_name": report_name, - "report_body": report_body, - } - ) - - return ( - create_response("Monthly Report Saved Successfully"), - Status.HTTP_201_CREATED, - ) - - @swag_from("/api/docs/report/monthly_report_get.yml") - @validate_request_params("userId|required:str") - def get(self): - tenant = request.args.get("tenant") - user_id = request.args.get("userId") - - report_model = ReportTemplateModel(tenant) - - report = list( - report_model.filter_by(user_id=user_id).exec( - { - "_id": 1, - "user_id": 1, - "report_date": { - "$dateToString": { - "format": "%Y-%m-%dT%H:%M:%S%z", - "date": "$time", - "timezone": "Africa/Kampala", - }, - }, - "report_type": 1, - "report_name": 1, - "report_body": 1, - } - ) - ) - - if report: - return ( - create_response( - "reports successfully fetched", data={"reports": report} - ), - Status.HTTP_200_OK, - ) - - return ( - create_response("report(s) not found", success=False), - Status.HTTP_404_NOT_FOUND, - ) - - -@rest_api_v2.route("/report/monthly/") -class MonthlyReportExtraResource(Resource): - @swag_from("/api/docs/report/monthly_report_extra_post.yml") - @validate_request_json("userId|str", "reportName|str", "reportBody|dict") - def post(self, report_name): - tenant = request.args.get("tenant") - data = request.get_json() or {} - - update_fields = {} - - valid_keys = ["userId", "reportName", "reportBody"] - - for key, value in data.items(): - if key in valid_keys: - update_fields[camel_to_snake(key)] = value - - if not update_fields: - return ( - create_response( - f"the update fields is empty. valid keys are {valid_keys}", - success=False, - ), - Status.HTTP_400_BAD_REQUEST, - ) - - report_model = ReportTemplateModel(tenant) - - update_result = report_model.update_one( - filter_cond={"report_name": report_name}, update_fields=update_fields - ) - - if update_result.modified_count > 0 or update_result.matched_count > 0: - return ( - create_response("report updated successfully"), - Status.HTTP_202_ACCEPTED, - ) - - return ( - create_response("report not found", success=False), - Status.HTTP_404_NOT_FOUND, - ) - - @swag_from("/api/docs/report/monthly_report_extra_delete.yml") - def delete(self, report_name): - tenant = request.args.get("tenant") - - report_model = ReportTemplateModel(tenant) - - delete_result = report_model.delete_one({"report_name": report_name}) - - if delete_result.deleted_count > 0: - return ( - create_response(f"monthly report {report_name} deleted successfully"), - Status.HTTP_200_OK, - ) - - return ( - create_response("report not found", success=False), - Status.HTTP_404_NOT_FOUND, - ) - - -@rest_api_v2.route("/grid/report") -class GridReportResource(Resource): - @swag_from("/api/docs/report/generate_grid_report_post.yml") - @validate_request_json( - "start_time|required:str", "end_time|required:str", "grid_id|optional:str" - ) - def post(self): - data = request.get_json() - return air_quality_data(data) diff --git a/src/insights/app.py b/src/insights/app.py new file mode 100644 index 0000000000..99f24057a2 --- /dev/null +++ b/src/insights/app.py @@ -0,0 +1,65 @@ +from celery import Celery, Task +from flasgger import Swagger +from flask import Flask, jsonify, send_from_directory +from flask_caching import Cache +from flask_cors import CORS +from flask_excel import init_excel +from flask_restx import Api +from werkzeug.middleware.proxy_fix import ProxyFix + +from config import CONFIGURATIONS, API_BASE_URL +from config import Config +from namespaces import dashboard_api, data_export_api, auto_report_api + +api = Api( + title="AirQo API", version="1.0", description="AirQo API", prefix=API_BASE_URL +) +cache = Cache() + + +def celery_init_app(app: Flask) -> Celery: + class FlaskTask(Task): + def __call__(self, *args: object, **kwargs: object) -> object: + with app.app_context(): + return self.run(*args, **kwargs) + + celery_app = Celery(app.name, task_cls=FlaskTask) + celery_app.config_from_object(app.config["CELERY"]) + celery_app.set_default() + app.extensions["celery"] = celery_app + return celery_app + + +def create_app(): + app = Flask(__name__) + app.wsgi_app = ProxyFix(app.wsgi_app) + app.config.from_object(CONFIGURATIONS) + app.config["CELERY"] = { + "broker_url": f"{Config.CACHE_REDIS_URL}/0", + "result_backend": f"{Config.CACHE_REDIS_URL}/0", + } + + cache.init_app(app) + init_excel(app) + Swagger(app) + CORS(app) + api.init_app(app) + api.add_namespace(ns=dashboard_api) + api.add_namespace(ns=data_export_api) + api.add_namespace(ns=auto_report_api) + + celery_init_app(app) + + @app.route("/health") + def health(): + return jsonify(dict(message="App status - OK.")) + + @app.route("/docs") + def docs(): + return send_from_directory(directory="docs/", path="status.yml") + + return app + + +flask_app = create_app() +celery_app = flask_app.extensions["celery"] diff --git a/src/insights/celery_app.py b/src/insights/celery_app.py deleted file mode 100644 index 32522eaa2b..0000000000 --- a/src/insights/celery_app.py +++ /dev/null @@ -1,120 +0,0 @@ -import logging -import traceback -from datetime import timedelta - -from celery.utils.log import get_task_logger -from flask import Flask -from flask_caching import Cache -from flask_pymongo import PyMongo - -from celery import Celery - -from config import Config, CONFIGURATIONS -from api.models import DataExportModel, DataExportStatus, DataExportRequest, EventsModel - -celery_logger = get_task_logger(__name__) -_logger = logging.getLogger(__name__) - -# db initialization -mongo = PyMongo() -cache = Cache() - - -def create_app(): - application = Flask(__name__) - application.config.from_object(CONFIGURATIONS) - mongo.init_app(application) - cache.init_app(application) - return application - - -def make_celery(): - config = { - "broker_url": f"{Config.CACHE_REDIS_URL}/0", - "result_backend": f"{Config.CACHE_REDIS_URL}/0", - "task_default_queue": "analytics", - "beat_schedule": { - "data_export_periodic_task": { - "task": "data_export_periodic_task", - "schedule": timedelta(seconds=5), - } - }, - "app_name": "data_export", - } - - celery_application = Celery(config["app_name"], broker=config["broker_url"]) - celery_application.conf.update(config) - return celery_application - - -celery = make_celery() - - -@celery.task(name="data_export_periodic_task") -def data_export_task(): - celery_logger.info("Data export periodic task running") - - data_export_model = DataExportModel() - pending_requests = data_export_model.get_scheduled_and_failed_requests() - - if len(pending_requests) == 0: - celery_logger.info("No data for processing") - return - else: - celery_logger.info(f"Commenced processing {len(pending_requests)} request(s)") - - requests_for_processing: [DataExportRequest] = [] - - for request in pending_requests: - request.status = DataExportStatus.PROCESSING - success = data_export_model.update_request_status_and_retries(request) - if success: - requests_for_processing.append(request) - - for request in requests_for_processing: - try: - query = EventsModel.data_export_query( - sites=request.sites, - devices=request.devices, - airqlouds=request.airqlouds, - start_date=request.start_date, - end_date=request.end_date, - frequency=request.frequency.value, - pollutants=request.pollutants, - ) - - has_data = data_export_model.has_data(query) - - if not has_data: - request.status = DataExportStatus.NO_DATA - data_export_model.update_request_status_and_retries(request) - continue - - data_export_model.export_query_results_to_table( - query=query, export_request=request - ) - data_export_model.export_table_to_gcs(export_request=request) - data_links: [str] = data_export_model.get_data_links(export_request=request) - - request.data_links = data_links - request.status = DataExportStatus.READY - - success = data_export_model.update_request_status_and_data_links(request) - - if not success: - raise Exception("Update failed") - - except Exception as ex: - print(ex) - traceback.print_exc() - request.status = DataExportStatus.FAILED - request.retries = request.retries - 1 - data_export_model.update_request_status_and_retries(request) - - celery_logger.info( - f"Finished processing {len(requests_for_processing)} request(s)" - ) - - -if __name__ == "__main__": - data_export_task() diff --git a/src/insights/config.py b/src/insights/config.py index 180341f675..c967d5c712 100644 --- a/src/insights/config.py +++ b/src/insights/config.py @@ -1,9 +1,9 @@ import os - from datetime import datetime from pathlib import Path -from dotenv import load_dotenv + from decouple import config as env_var +from dotenv import load_dotenv from flasgger import LazyString env_path = Path(".") / ".env" @@ -11,22 +11,19 @@ TWO_HOURS = 7200 # seconds -API_V2_BASE_URL = "/api/v2/insights" +API_BASE_URL = "/api/v2/insights" APP_ENV = env_var("FLASK_ENV", "production") class Config: - DEBUG = False - TESTING = False - CSRF_ENABLED = True SECRET_KEY = env_var("SECRET_KEY") AIRQO_API_BASE_URL = env_var("AIRQO_API_BASE_URL") GRID_URL = os.getenv("GRID_URL_ID") CACHE_TYPE = "RedisCache" CACHE_DEFAULT_TIMEOUT = TWO_HOURS - CACHE_KEY_PREFIX = f"Analytics-{APP_ENV}" + CACHE_KEY_PREFIX = f"insights-{APP_ENV}" CACHE_REDIS_HOST = env_var("REDIS_SERVER") CACHE_REDIS_PORT = env_var("REDIS_PORT") CACHE_REDIS_URL = f"redis://{env_var('REDIS_SERVER')}:{env_var('REDIS_PORT')}" @@ -61,8 +58,8 @@ class Config: SWAGGER = { "swagger": "2.0", "info": { - "title": "Analytics API", - "description": "API docs for analytics AirQO microservice", + "title": "insights API", + "description": "API docs for insights AirQO microservice", "version": "0.0.1", }, "schemes": ["http", "https"], @@ -76,7 +73,7 @@ class Config: "ui_params_text": """{ "operationsSorter" : (a, b) => a.get("path").localeCompare(b.get("path")) }""", - "url_prefix": f"{API_V2_BASE_URL}", + "url_prefix": f"{API_BASE_URL}", } @@ -89,8 +86,8 @@ class ProductionConfig(Config): class DevelopmentConfig(Config): - DEVELOPMENT = True DEBUG = True + TESTING = True MONGO_URI = env_var("MONGO_LOCAL_URI") DB_NAME = env_var("MONGO_DEV") BIGQUERY_EVENTS = env_var("BIGQUERY_EVENTS_STAGE") @@ -98,8 +95,8 @@ class DevelopmentConfig(Config): class TestingConfig(Config): - TESTING = True DEBUG = True + TESTING = True MONGO_URI = env_var("MONGO_GCE_URI") DB_NAME = env_var("MONGO_STAGE") BIGQUERY_EVENTS = env_var("BIGQUERY_EVENTS_STAGE") @@ -112,7 +109,4 @@ class TestingConfig(Config): "staging": TestingConfig, "production": ProductionConfig, } - -print(f"app running - {APP_ENV.upper()} mode") - CONFIGURATIONS = config[APP_ENV] diff --git a/src/insights/devices_summary.py b/src/insights/devices_summary.py deleted file mode 100644 index 4735baecd6..0000000000 --- a/src/insights/devices_summary.py +++ /dev/null @@ -1,38 +0,0 @@ -import copy -from datetime import datetime, timedelta - -from api.models import EventsModel -from api.utils.data_formatters import compute_devices_summary -from api.utils.dates import str_to_date -from config import Config - - -def create_date_list(start_date: datetime, end_date: datetime): - date_list: list[datetime] = [] - current_date = start_date - - while current_date <= end_date: - date_list.append(current_date) - current_date += timedelta(days=1) - - return date_list - - -def compute_historical_summary(): - model = EventsModel("airqo") - start_date = str_to_date("2023-01-01", format="%Y-%m-%d") - end_date = str_to_date("2023-01-10", format="%Y-%m-%d") - date_list = create_date_list(start_date, end_date) - for date in date_list: - date_data = model.get_devices_hourly_data(day=date) - date_summary = compute_devices_summary(copy.deepcopy(date_data)) - model.save_devices_summary_data(copy.deepcopy(date_summary)) - - -if __name__ == "__main__": - events_model = EventsModel("airqo") - data = events_model.get_devices_hourly_data( - day=datetime.utcnow() - timedelta(days=int(Config.DATA_SUMMARY_DAYS_INTERVAL)) - ) - summary = compute_devices_summary(copy.deepcopy(data)) - events_model.save_devices_summary_data(copy.deepcopy(summary)) diff --git a/src/insights/docker-compose.yaml b/src/insights/docker-compose.yaml new file mode 100644 index 0000000000..1014018aec --- /dev/null +++ b/src/insights/docker-compose.yaml @@ -0,0 +1,27 @@ +services: + redis: + image: redis + healthcheck: + test: [ "CMD", "redis-cli","ping" ] + interval: 5s + timeout: 15s + retries: 3 + ports: + - "6379:6379" + + flask: + build: + context: . + target: dev + volumes: + - .:/app + ports: + - "5000:5000" + + celery: + build: + context: . + target: celery + depends_on: + redis: + condition: service_healthy \ No newline at end of file diff --git a/src/insights/api/docs/dashboard/customised_chart_post.yml b/src/insights/docs/dashboard/customised_chart_post.yml similarity index 100% rename from src/insights/api/docs/dashboard/customised_chart_post.yml rename to src/insights/docs/dashboard/customised_chart_post.yml diff --git a/src/insights/api/docs/dashboard/d3_chart_data_post.yml b/src/insights/docs/dashboard/d3_chart_data_post.yml similarity index 100% rename from src/insights/api/docs/dashboard/d3_chart_data_post.yml rename to src/insights/docs/dashboard/d3_chart_data_post.yml diff --git a/src/insights/api/docs/dashboard/device_daily_measurements_get.yml b/src/insights/docs/dashboard/device_daily_measurements_get.yml similarity index 100% rename from src/insights/api/docs/dashboard/device_daily_measurements_get.yml rename to src/insights/docs/dashboard/device_daily_measurements_get.yml diff --git a/src/insights/api/docs/dashboard/download_custom_data_post.yml b/src/insights/docs/dashboard/download_custom_data_post.yml similarity index 100% rename from src/insights/api/docs/dashboard/download_custom_data_post.yml rename to src/insights/docs/dashboard/download_custom_data_post.yml diff --git a/src/insights/api/docs/dashboard/exceedances_location_get.yml b/src/insights/docs/dashboard/exceedances_location_get.yml similarity index 100% rename from src/insights/api/docs/dashboard/exceedances_location_get.yml rename to src/insights/docs/dashboard/exceedances_location_get.yml diff --git a/src/insights/api/docs/dashboard/exceedances_post.yml b/src/insights/docs/dashboard/exceedances_post.yml similarity index 100% rename from src/insights/api/docs/dashboard/exceedances_post.yml rename to src/insights/docs/dashboard/exceedances_post.yml diff --git a/src/insights/api/docs/dashboard/monitoring_site_get.yml b/src/insights/docs/dashboard/monitoring_site_get.yml similarity index 100% rename from src/insights/api/docs/dashboard/monitoring_site_get.yml rename to src/insights/docs/dashboard/monitoring_site_get.yml diff --git a/src/insights/api/docs/report/generate_grid_report_post.yml b/src/insights/docs/report/generate_grid_report_post.yml similarity index 100% rename from src/insights/api/docs/report/generate_grid_report_post.yml rename to src/insights/docs/report/generate_grid_report_post.yml diff --git a/src/insights/api/docs/status.yml b/src/insights/docs/status.yml similarity index 100% rename from src/insights/api/docs/status.yml rename to src/insights/docs/status.yml diff --git a/src/insights/jobs/reports/Dockerfile b/src/insights/jobs/reports/Dockerfile deleted file mode 100644 index 279bea3a04..0000000000 --- a/src/insights/jobs/reports/Dockerfile +++ /dev/null @@ -1,27 +0,0 @@ -FROM python:3.7-slim as base - -RUN apt update -RUN apt install libgomp1 - -COPY . /app/ -WORKDIR /app/ - -# Install and cache requirements for faster builds -RUN pip install --upgrade pip -RUN pip install -r requirements.txt - -# Production -FROM base as production -ENV ENV=production -CMD ["python", "main.py"] - -# Staging -FROM base as staging -ENV ENV=staging -CMD ["python", "main.py"] - -# development -FROM base as development -ENV ENV=development - -CMD ["python", "main.py"] \ No newline at end of file diff --git a/src/insights/jobs/reports/__init__.py b/src/insights/jobs/reports/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/src/insights/jobs/reports/config.py b/src/insights/jobs/reports/config.py deleted file mode 100644 index 4297c0acb7..0000000000 --- a/src/insights/jobs/reports/config.py +++ /dev/null @@ -1,73 +0,0 @@ -import os -from datetime import datetime -import pandas as pd - -# from pymongo import MongoClient -from dotenv import load_dotenv -from pathlib import Path - -BASE_DIR = Path(__file__).resolve().parent -dotenv_path = os.path.join(BASE_DIR, ".env") -load_dotenv(dotenv_path) - - -class Config: - DEBUG = False - TESTING = False - CSRF_ENABLED = True - CREDENTIALS = os.getenv("GOOGLE_APPLICATION_CREDENTIALS") - GOOGLE_CLOUD_PROJECT_ID = os.getenv("GOOGLE_CLOUD_PROJECT_ID") - - TENANT = os.getenv("TENANT", "airqo") - MONTHS_OF_DATA = os.getenv("MONTHS_OF_DATA", 3) - EXPECTED_DAYS = os.getenv("EXPECTED_DAYS") - BIGQUERY_SITES = os.getenv("BIGQUERY_SITES") - - -class ProductionConfig(Config): - MAIL_SENDER_EMAILADDRESS = os.getenv("MAIL_SENDER_EMAILADDRESS") - MAIL_SENDER_PASSWORD = os.getenv("MAIL_SENDER_PASSWORD") - MAIL_RECEIVER_EMAILADDRESS = os.getenv("MAIL_RECEIVER_EMAILADDRESS") - MAIL_SUBJECT = os.getenv("MAIL_SUBJECT") - AIRQO_QUARTERLY_REPORT_BUCKET = os.getenv("AIRQO_REPORTS_BUCKET_PROD") - BIGQUERY_EVENTS = os.getenv("BIGQUERY_EVENTS_PROD") - BIGQUERY_SITES_METADATA = os.getenv("BIGQUERY_SITES_METADATA_PROD") - - -class TestingConfig(Config): - DEBUG = True - TESTING = True - MAIL_SENDER_EMAILADDRESS = os.getenv("MAIL_SENDER_EMAILADDRESS") - MAIL_SENDER_PASSWORD = os.getenv("MAIL_SENDER_PASSWORD") - MAIL_RECEIVER_EMAILADDRESS = os.getenv("MAIL_RECEIVER_EMAILADDRESS") - MAIL_SUBJECT = os.getenv("MAIL_SUBJECT") - AIRQO_QUARTERLY_REPORT_BUCKET = os.getenv("AIRQO_REPORTS_BUCKET_STAGE") - BIGQUERY_EVENTS = os.getenv("BIGQUERY_EVENTS_STAGE") - BIGQUERY_SITES_METADATA = os.getenv("BIGQUERY_SITES_METADATA_STAGE") - EXPECTED_DAYS = os.getenv("EXPECTED_DAYS") - - -class DevelopmentConfig(Config): - DEVELOPMENT = True - DEBUG = True - MAIL_SENDER_EMAILADDRESS = os.getenv("MAIL_SENDER_EMAILADDRESS") - MAIL_SENDER_PASSWORD = os.getenv("MAIL_SENDER_PASSWORD") - MAIL_RECEIVER_EMAILADDRESS = os.getenv("MAIL_RECEIVER_EMAILADDRESS") - MAIL_SUBJECT = os.getenv("MAIL_SUBJECT") - AIRQO_QUARTERLY_REPORT_BUCKET = os.getenv("AIRQO_REPORTS_BUCKET_STAGE") - BIGQUERY_EVENTS = os.getenv("BIGQUERY_EVENTS_STAGE") - BIGQUERY_SITES_METADATA = os.getenv("BIGQUERY_SITES_METADATA_STAGE") - EXPECTED_DAYS = os.getenv("EXPECTED_DAYS") - - -app_config = { - "development": DevelopmentConfig, - "testing": TestingConfig, - "production": ProductionConfig, - "staging": TestingConfig, -} - -environment = os.getenv("ENV") -print("ENVIRONMENT", environment or "staging") - -configuration = app_config.get(environment, TestingConfig) diff --git a/src/insights/jobs/reports/main.py b/src/insights/jobs/reports/main.py deleted file mode 100644 index 1292256b11..0000000000 --- a/src/insights/jobs/reports/main.py +++ /dev/null @@ -1,939 +0,0 @@ -from google.cloud import storage -from google.cloud import bigquery -import pandas as pd -import numpy as np -import seaborn as sns - -sns.set_style("whitegrid") -import matplotlib.pyplot as plt - -plt.rcParams["figure.figsize"] = [14, 6] -import matplotlib.dates as mdates -from docx import Document -from docx.shared import Inches -import io -import ast -import gcsfs -from config import environment, configuration -import os -import joblib -import datetime as dt -from datetime import datetime, timedelta -from utils import ( - upload_trained_model_to_gcs, - date_to_str, - str_to_date_2, - upload_csv_file_to_gcs, - previous_months_range, -) -import smtplib -from email.mime.multipart import MIMEMultipart -from email.mime.text import MIMEText -from email.mime.base import MIMEBase -from email import encoders -import warnings - -warnings.filterwarnings("ignore") - - -BIGQUERY_SITES = configuration.BIGQUERY_SITES -BIGQUERY_SITES_METADATA = configuration.BIGQUERY_SITES_METADATA -BIGQUERY_EVENTS = configuration.BIGQUERY_EVENTS -POLLUTANT_BIGQUERY_MAPPER = { - "pm2_5": ["pm2_5_calibrated_value", "pm2_5_raw_value"], - "pm10": ["pm10_calibrated_value", "pm10_raw_value"], -} - -expected_days = int(configuration.EXPECTED_DAYS) -expected_hourly_records = expected_days * 24 -cutoff_count = 0.75 * expected_hourly_records -revised_cutoff_count = 0.60 * expected_hourly_records - - -def get_city_data(data_): - data_.rename(columns={"datetime": "timestamp"}, inplace=True) - data_["timestamp"] = pd.to_datetime(data_["timestamp"]) - data_["timestamp"] = pd.to_datetime( - data_["timestamp"].dt.strftime("%Y-%m-%d %H:%M:%S") - ) - data_["timestamp"] = data_["timestamp"].apply(lambda x: x.replace(tzinfo=None)) - data_ = data_[data_["pm2_5_calibrated_value"] <= 500] - data_ = data_[data_["pm2_5_calibrated_value"] >= 1] - data_["month"] = data_["timestamp"].dt.month - data_["year"] = data_["timestamp"].dt.year - data_["hour"] = data_["timestamp"].dt.hour - data_["day_of_week"] = data_["timestamp"].dt.strftime("%A") - data_["Month"] = data_["timestamp"].dt.strftime("%b") - data = data_[ - [ - "timestamp", - "site", - "pm2_5_calibrated_value", - "pm2_5_raw_value", - "pm10_calibrated_value", - "pm10_raw_value", - "city", - "month", - "year", - "Month", - ] - ] - return data - - -def get_yearly(filtered_df, col_value="pm2_5_calibrated_value"): - yearly = ( - filtered_df[[col_value, "year"]].groupby(filtered_df["year"]).mean().round(2) - ) - yearly["Year"] = yearly.index - yearly["Year"] = yearly["Year"].astype(str) - return yearly - - -def plot_yearly(yearly, city, col_value="pm2_5_calibrated_value"): - f, a = plt.subplots(figsize=(8, 5)) - yn = sns.barplot(x=yearly.Year, y=col_value, data=yearly, axes=a) - y_axis_label = "PM$_{2.5}$ Concentration(µg/m\N{SUPERSCRIPT THREE})" - plt.ylabel(y_axis_label) - plt.title("Average Yearly PM2.5 Concentration in " + city) - mem_image_file = io.BytesIO() - ww = plt.savefig(mem_image_file, format="png") - plt.close() - return f, mem_image_file - - -def get_monthly(filtered_df, col_value="pm2_5_calibrated_value"): - monthly = ( - filtered_df[[col_value, "Month"]].groupby(filtered_df["Month"]).mean().round(2) - ) - ordered_months = [ - "Jan", - "Feb", - "Mar", - "Apr", - "May", - "Jun", - "Jul", - "Aug", - "Sep", - "Oct", - "Nov", - "Dec", - ] - monthly = monthly.reindex(ordered_months) - monthly_ = monthly.reset_index() - return monthly_ - - -def plot_monthly(monthly, city, col_value="pm2_5_calibrated_value"): - figm, axm = plt.subplots(figsize=(10, 6)) - mn = sns.barplot(x=monthly.index, y=col_value, data=monthly, axes=axm) - y_axis_label = "PM$_{2.5}$ Concentration(µg/m\N{SUPERSCRIPT THREE})" - plt.ylabel(y_axis_label) - plt.title("Average Monthly PM2.5 Concentration in " + city) - mem_image_file = io.BytesIO() - ww = plt.savefig(mem_image_file, format="png") - plt.close() - return figm, mem_image_file - - -def sort_sites_based_on( - df, index="site", col_values="pm2_5_calibrated_value", order=False -): - df_p = df.pivot_table(index=index, values=col_values) - sorted_df = df_p.reset_index().sort_values(by=col_values, ascending=order) - sorted_df = sorted_df.reset_index().sort_values(by=col_values, ascending=False) - sorted_df.drop("index", axis=1, inplace=True) - return sorted_df.round(2) - - -def visualise_data_(df, device_id, label_value, column_value, ax): - df.sort_values("timestamp", ascending=True) - df2 = df.resample("M").mean().round(2).dropna(axis=0) - date_form = mdates.DateFormatter("%Y-%m") - monthly_interval = mdates.MonthLocator(interval=4) - - ax.plot(df2.index, df2[column_value], label=label_value) - ax.set_title(device_id) - ax.set(xlabel="Date", ylabel=label_value) - ax.xaxis.set_major_locator(monthly_interval) - ax.xaxis.set_major_formatter(date_form) - ax.tick_params(axis="x", labelrotation=60) - plt.legend() - - -def plot_bar_graph(data, city, x_label_rotation=90): - ax = sns.barplot( - x="site", y="pm2_5_calibrated_value", data=data, ci=None, palette="Blues" - ) - plt.title("Average PM$_{2.5}$ Concentration in " + city, fontsize=28) - plt.xlabel("Location ") - plt.ylabel("PM$_{2.5}$ $\mu$ g/m$^3$") - plt.xticks(rotation=x_label_rotation) - plt.show() - - -def reprocess_to_monthly_df(df): - df2 = df.resample("M").mean().round(2).dropna(axis=0).reset_index() - return df2 - - -def reprocess_to_daily_df(df): - df2 = df.resample("D").mean().round(2).dropna(axis=0).reset_index() - return df2 - - -def reprocess_to_annual_df(df): - df_ = df.resample("D").mean().round(2).dropna(axis=0) - df2 = df_.resample("A").mean().round(2).dropna(axis=0).reset_index() - return df2 - - -def convert_AQI_category(row): - if row["pm2_5_calibrated_value"] <= 12.0: - return "Good" - elif row["pm2_5_calibrated_value"] <= 35.4: - return "Moderate" - elif row["pm2_5_calibrated_value"] <= 55.4: - return "Unhealthy for sensitive group" - elif row["pm2_5_calibrated_value"] <= 150.4: - return "Unhealthy" - elif row["pm2_5_calibrated_value"] <= 250.4: - return "Very Unhealthy" - else: - return "Hazardous" - - -def compute_percentage(expected_records, actual_records): - print(expected_records) - print(actual_records) - if actual_records != 0: - return round((actual_records / expected_records) * 100, 2) - else: - return 0 - - -def aggregate_data( - data_unique, - col_value="pm2_5_calibrated_value", - second_col_value="pm2_5_raw_value", - third_col_value="pm10_calibrated_value", - fourth_col_value="pm10_raw_value", - total_expected_records_in_the_timeperiod=2160, -): - device_records = [] - results_dictionary = [] - unique_sites = data_unique.site.unique() - for device in unique_sites: - kd = data_unique[data_unique["site"] == device] - res = ( - kd[[col_value, second_col_value, third_col_value, fourth_col_value]] - .describe() - .round(2) - ) - device_records.append(kd) - overall_percentage_of_available_calibrated_pm2_5 = compute_percentage( - total_expected_records_in_the_timeperiod, - int(res[[col_value]].loc["count"].values[0]), - ) - - overall_percentage_of_available_raw_pm2_5 = compute_percentage( - total_expected_records_in_the_timeperiod, - int(res[[second_col_value]].loc["count"].values[0]), - ) - - overall_percentage_of_missing_calibrated_pm2_5 = ( - 100 - overall_percentage_of_available_calibrated_pm2_5 - ) - overall_percentage_of_missing_raw_pm2_5 = ( - 100 - overall_percentage_of_available_raw_pm2_5 - ) - - overall_percentage_of_available_calibrated_pm10 = compute_percentage( - total_expected_records_in_the_timeperiod, - int(res[[third_col_value]].loc["count"].values[0]), - ) - - overall_percentage_of_available_raw_pm10 = compute_percentage( - total_expected_records_in_the_timeperiod, - int(res[[fourth_col_value]].loc["count"].values[0]), - ) - - overall_percentage_of_missing_calibrated_pm10 = ( - 100 - overall_percentage_of_available_calibrated_pm10 - ) - overall_percentage_of_missing_raw_pm10 = ( - 100 - overall_percentage_of_available_raw_pm10 - ) - - dic = { - "site": device, - "calibrated_pm2_5_count": int(res[[col_value]].loc["count"].values[0]), - "avg_calibrated_pm2_5": res[[col_value]].loc["mean"].values[0], - "min_calibrated_pm2_5": res[[col_value]].loc["min"].values[0], - "max_calibrated_pm2_5": res[[col_value]].loc["max"].values[0], - "std_calibrated_pm2_5": res[[col_value]].loc["std"].values[0], - "raw_pm2_5_count": int(res[[second_col_value]].loc["count"].values[0]), - "avg_raw_pm2_5": res[[second_col_value]].loc["mean"].values[0], - "min_raw_pm2_5": res[[second_col_value]].loc["min"].values[0], - "max_raw_pm2_5": res[[second_col_value]].loc["max"].values[0], - "std_raw_pm2_5": res[[second_col_value]].loc["std"].values[0], - "calibrated_pm10_count": int(res[[third_col_value]].loc["count"].values[0]), - "avg_calibrated_pm10": res[[third_col_value]].loc["mean"].values[0], - "min_calibrated_pm10": res[[third_col_value]].loc["min"].values[0], - "max_calibrated_pm10": res[[third_col_value]].loc["max"].values[0], - "std_calibrated_pm10": res[[third_col_value]].loc["std"].values[0], - "raw_pm10_count": int(res[[fourth_col_value]].loc["count"].values[0]), - "avg_raw_pm10": res[[fourth_col_value]].loc["mean"].values[0], - "min_raw_pm10": res[[fourth_col_value]].loc["min"].values[0], - "max_raw_pm10": res[[fourth_col_value]].loc["max"].values[0], - "std_raw_pm10": res[[fourth_col_value]].loc["std"].values[0], - "percentage_of_calibrated_pm2_5": compute_percentage( - int(res[[second_col_value]].loc["count"].values[0]), - int(res[[col_value]].loc["count"].values[0]), - ), - "percentage_of_calibrated_pm10": compute_percentage( - int(res[[fourth_col_value]].loc["count"].values[0]), - int(res[[third_col_value]].loc["count"].values[0]), - ), - "overall_percentage_of_available_calibrated_pm2_5": overall_percentage_of_available_calibrated_pm2_5, - "overall_percentage_of_missing_calibrated_pm2_5": overall_percentage_of_missing_calibrated_pm2_5, - "overall_percentage_of_available_calibrated_pm10": overall_percentage_of_available_calibrated_pm10, - "overall_percentage_of_missing_calibrated_pm10": overall_percentage_of_missing_calibrated_pm10, - "overall_percentage_of_available_raw_pm2_5": overall_percentage_of_available_raw_pm2_5, - "overall_percentage_of_missing_raw_pm2_5": overall_percentage_of_missing_raw_pm2_5, - "overall_percentage_of_available_raw_pm10": overall_percentage_of_available_raw_pm10, - "overall_percentage_of_missing_raw_pm10": overall_percentage_of_missing_raw_pm10, - } - results_dictionary.append(dic) - - final_df = pd.DataFrame(results_dictionary) - final_df.sort_values( - by=["percentage_of_calibrated_pm2_5"], ascending=False, inplace=True - ) - return final_df - - -def get_sites_to_omit_due_to_fewer_observations(final_df, count=2000): - omitted_sites_df = final_df[final_df.raw_pm2_5_count <= count] - omitted_sites_df.reset_index(drop=True) - omitted_sites = omitted_sites_df["site"].unique() - print(len(omitted_sites)) - return omitted_sites - - -def filter_sites_to_use(data_unique, omitted_sites): - filtered_df = data_unique[~data_unique["site"].isin(omitted_sites)] - return filtered_df - - -def get_site_summarised_stats(data, expected_hourly_records): - site_summarised_data = aggregate_data( - data, - "pm2_5_calibrated_value", - "pm2_5_raw_value", - "pm10_calibrated_value", - "pm10_raw_value", - expected_hourly_records, - ) - - site_descriptive_stats_calibrated_pm2_5 = site_summarised_data[ - [ - "site", - "avg_calibrated_pm2_5", - "min_calibrated_pm2_5", - "max_calibrated_pm2_5", - "std_calibrated_pm2_5", - ] - ].round(2) - - site_descriptive_stats_calibrated_pm2_5.rename( - columns={ - "site": "site name", - "avg_calibrated_pm2_5": "Mean", - "min_calibrated_pm2_5": "Min", - "max_calibrated_pm2_5": "Max", - "std_calibrated_pm2_5": "STD", - }, - inplace=True, - ) - - site_descriptive_stats_calibrated_pm10 = site_summarised_data[ - [ - "site", - "avg_calibrated_pm10", - "min_calibrated_pm10", - "max_calibrated_pm10", - "std_calibrated_pm10", - ] - ].round(2) - - site_descriptive_stats_calibrated_pm10.rename( - columns={ - "site": "site name", - "avg_calibrated_pm10": "Mean", - "min_calibrated_pm10": "Min", - "max_calibrated_pm10": "Max", - "std_calibrated_pm10": "STD", - }, - inplace=True, - ) - - site_descriptive_stats_raw_pm2_5 = site_summarised_data[ - ["site", "avg_raw_pm2_5", "min_raw_pm2_5", "max_raw_pm2_5", "std_raw_pm2_5"] - ].round(2) - - site_descriptive_stats_raw_pm2_5.rename( - columns={ - "site": "site name", - "avg_raw_pm2_5": "Mean", - "min_raw_pm2_5": "Min", - "max_raw_pm2_5": "Max", - "std_raw_pm2_5": "STD", - }, - inplace=True, - ) - - site_descriptive_stats_raw_pm10 = site_summarised_data[ - ["site", "avg_raw_pm10", "min_raw_pm10", "max_raw_pm10", "std_raw_pm10"] - ].round(2) - - site_descriptive_stats_raw_pm10.rename( - columns={ - "site": "site name", - "avg_raw_pm10": "Mean", - "min_raw_pm10": "Min", - "max_raw_pm10": "Max", - "std_raw_pm10": "STD", - }, - inplace=True, - ) - - sorted_site = sort_sites_based_on( - data, index="site", col_values="pm2_5_calibrated_value", order=False - ) - - return ( - site_summarised_data, - site_descriptive_stats_calibrated_pm2_5, - site_descriptive_stats_calibrated_pm10, - site_descriptive_stats_calibrated_pm10, - site_descriptive_stats_raw_pm2_5, - site_descriptive_stats_raw_pm10, - sorted_site, - ) - - -def generate_site_stats_dictionaries_for_saving_to_file( - site_calibrated_data_summary, - overall_site_calibrated, - overall_site_raw, - site_descriptive_stats_calibrated_pm2_5, - site_descriptive_stats_calibrated_pm10, - site_descriptive_stats_raw_pm2_5, - site_descriptive_stats_raw_pm10, - sites_to_omit, - sorted_sites, -): - site_overall_raw_data_completeness_ = { - "title": "Table showing overall data completeness based on raw measurements", - "data": overall_site_raw, - } - - site_overall_calibrated_data_completeness_ = { - "title": "Table showing overall data completeness based on calibrated measurements", - "data": overall_site_calibrated, - } - - site_calibrated_data_summary_ = { - "title": "Table showing percentage of calibrated data in relation to raw measurements", - "data": site_calibrated_data_summary, - } - - site_calibrated_pm2_5_descriptive_stat_ = { - "title": "Table showing descriptive statistics for calibrated PM2.5 data", - "data": site_descriptive_stats_calibrated_pm2_5, - } - - site_calibrated_pm10_descriptive_stat_ = { - "title": "Table showing descriptive statistics for calibrated PM10 data", - "data": site_descriptive_stats_calibrated_pm10, - } - - site_raw_pm2_5_descriptive_stat_ = { - "title": "Table showing descriptive statistics for raw PM2.5 data", - "data": site_descriptive_stats_raw_pm2_5, - } - - site_raw_pm10_descriptive_stat_ = { - "title": "Table showing descriptive statistics for raw PM10 data", - "data": site_descriptive_stats_raw_pm10, - } - - sites_to_omit_heading_ = { - "content_type": "heading", - "text": "Number of sites to omit i.e. not meeting the required completennes threshold", - } - sites_to_omit_text_ = { - "content_type": "text_content", - "text": str(len(sites_to_omit)) - + " sites do not meet the required completennes threshold i.e;", - } - sites_to_omit_ = { - "content_type": "list", - "text": "Sites to omit i.e. not meeting the required completennes threshold", - "list_items": sites_to_omit, - } - - srt_ranked = { - "title": "Table showing monitoring sites average air quality ranked in descending order", - "data": sorted_sites, - } - - return ( - site_overall_raw_data_completeness_, - site_overall_calibrated_data_completeness_, - site_calibrated_data_summary_, - site_calibrated_pm2_5_descriptive_stat_, - site_calibrated_pm10_descriptive_stat_, - site_raw_pm2_5_descriptive_stat_, - site_raw_pm10_descriptive_stat_, - srt_ranked, - sites_to_omit_heading_, - sites_to_omit_text_, - sites_to_omit_, - ) - - -def get_site_data_completenness_stats(site_summarised_data): - site_calibrated_data_summary = site_summarised_data[ - [ - "site", - "calibrated_pm2_5_count", - "raw_pm2_5_count", - "calibrated_pm10_count", - "raw_pm10_count", - "percentage_of_calibrated_pm2_5", - "percentage_of_calibrated_pm10", - ] - ] - - site_calibrated_data_summary.rename( - columns={ - "site": "site name", - "calibrated_pm2_5_count": "Calibrated PM2.5 Count", - "raw_pm2_5_count": "Raw PM2.5 Count", - "calibrated_pm10_count": "Calibrated PM10 Count", - "raw_pm10_count": "Raw PM10 Count", - "percentage_of_calibrated_pm2_5": "Calibrated PM2.5(%)", - "percentage_of_calibrated_pm10": "Calibrated PM10(%)", - }, - inplace=True, - ) - - overall_site_calibrated = site_summarised_data[ - [ - "site", - "overall_percentage_of_available_calibrated_pm2_5", - "overall_percentage_of_missing_calibrated_pm2_5", - "overall_percentage_of_available_calibrated_pm10", - "overall_percentage_of_missing_calibrated_pm10", - ] - ] - - overall_site_calibrated.rename( - columns={ - "site": "site name", - "overall_percentage_of_available_calibrated_pm2_5": "Completeness(%) PM2.5", - "overall_percentage_of_missing_calibrated_pm2_5": "Missing(%) PM2.5", - "overall_percentage_of_available_calibrated_pm10": "Completeness(%) PM10", - "overall_percentage_of_missing_calibrated_pm10": "Missing(%) PM10", - }, - inplace=True, - ) - - overall_site_raw = site_summarised_data[ - [ - "site", - "overall_percentage_of_available_raw_pm2_5", - "overall_percentage_of_missing_raw_pm2_5", - "overall_percentage_of_available_raw_pm10", - "overall_percentage_of_missing_raw_pm10", - ] - ] - - overall_site_raw.rename( - columns={ - "site": "site name", - "overall_percentage_of_available_raw_pm2_5": "Completeness(%) PM2.5", - "overall_percentage_of_missing_raw_pm2_5": "Missing(%) PM2.5", - "overall_percentage_of_available_raw_pm10": "Completeness(%) PM10", - "overall_percentage_of_missing_raw_pm10": "Missing(%) PM10", - }, - inplace=True, - ) - - sites_to_omit = get_sites_to_omit_due_to_fewer_observations( - site_summarised_data, cutoff_count - ) - - return ( - site_calibrated_data_summary, - overall_site_calibrated, - overall_site_raw, - sites_to_omit, - ) - - -def save_monthly_and_annual_analysis_for_location(report_path, location_data): - for location_df in location_data: - location_name = location_df["site"].iat[0] - location_name_heading_ = { - "content_type": "heading", - "text": location_name + " overview", - } - location_monthly_data = get_monthly(location_df) - location_monthly_data_ = { - "content_type": "table", - "title": "Table showing " - + location_name - + " monthly averages for calibrated PM2.5 data", - "data": location_monthly_data, - } - - monthly_plot_fig, monthly_plot_mem_fig = plot_monthly( - location_monthly_data, location_name - ) - - location_monthly_plot_ = { - "content_type": "image", - "image_label": "Figure showing " - + location_name - + " monthly averages for calibrated PM2.5 data", - "image": monthly_plot_mem_fig, - } - - location_yearly_data = get_yearly(location_df) - location_yearly_data_ = { - "content_type": "table", - "title": "Table showing " - + location_name - + " 24 hour annual averages for calibrated PM2.5 data", - "data": location_yearly_data, - } - - yearly_plot_fig, yearly_plot_mem_fig = plot_yearly( - location_yearly_data, location_name - ) - - location_yearly_plot_ = { - "content_type": "image", - "image_label": "Figure showing " - + location_name - + " 24 hour annual averages for calibrated PM2.5 data", - "image": yearly_plot_mem_fig, - } - - write_city_analysis_to_word_file( - report_path, - location_name_heading=location_name_heading_, - location_monthly_data=location_monthly_data_, - location_monthly_plot_=location_monthly_plot_, - location_yearly_data=location_yearly_data_, - location_yearly_plot_=location_yearly_plot_, - ) - - -def write_report_template(file_path=None): - if file_path == None: - document = Document() - file_path = "demo.docx" - else: - document = Document(file_path) - - document.add_heading("Quarterly Air Quality Report Analysis", 0) - - p = document.add_paragraph("Analysis to be used for generating quarterly report ") - p.add_run("bold").bold = True - p.add_run(" and some ") - p.add_run("italic.").italic = True - - document.add_heading("Data Completeness Checks For all the cities", level=1) - document.add_paragraph("How completeness was computed", style="Intense Quote") - - document.add_paragraph("Expected days in the quarter is 90", style="List Bullet") - document.add_paragraph( - "Expected hourly records in the two years is expected days*24", - style="List Bullet", - ) - document.add_paragraph( - "Cutoff Count is : 0.75*expected hourly records (75%)", style="List Bullet" - ) - - document.save("demo.docx") - - -def write_city_analysis_to_word_file(file_path=None, city=None, *args_df, **kwargs): - if file_path == None: - document = Document() - file_path = "demo.docx" - else: - document = Document(file_path) - - if city != None: - document.add_heading(city, level=2) - - for dic in args_df: - content_df = dic.get("data") - title = dic.get("title") - - table_ = document.add_table( - content_df.shape[0] + 1, content_df.shape[1], style="Table Grid" - ) - - # add the header rows. - for j in range(content_df.shape[-1]): - table_.cell(0, j).text = content_df.columns[j] - - for i in range(content_df.shape[0]): - for j in range(content_df.shape[-1]): - table_.cell(i + 1, j).text = str(content_df.values[i][j]) - - document.add_paragraph(title) - - for key, value in kwargs.items(): - content_type = value.get("content_type") - - if content_type == "image": - image = value.get("image") - image_label = value.get("image_label") - document.add_picture(image, width=Inches(6)) - document.add_paragraph(image_label) - elif content_type == "heading": - text_content = value.get("text") - document.add_heading(text_content, level=1) - - elif content_type == "list": - list_items = value.get("list_items") - for x in list_items: - document.add_paragraph(x, style="List Bullet") - - elif content_type == "table": - content_df = value.get("data") - table_title = value.get("title") - table_ = document.add_table( - content_df.shape[0] + 1, content_df.shape[1], style="Table Grid" - ) - for j in range(content_df.shape[-1]): - table_.cell(0, j).text = content_df.columns[j] - - for i in range(content_df.shape[0]): - for j in range(content_df.shape[-1]): - table_.cell(i + 1, j).text = str(content_df.values[i][j]) - - document.add_paragraph(table_title) - - else: - text_content = value.get("text") - extra_content_value = value.get("data") - document.add_paragraph(text_content) - if extra_content_value != "": - document.add_paragraph(extra_content_value) - - document.save(file_path) - - -def generate_all_cities_analysis(data, report_path): - data.city = data.city.fillna("Missing City") - unique_cities = data["city"].unique() - for city in unique_cities: - city_data = data[data["city"] == city] - kapchorwa_data = get_city_data(city_data) - if kapchorwa_data.shape[0] > 1: - ( - kapchorwa_site_summarised_data, - site_descriptive_stats_calibrated_pm2_5, - site_descriptive_stats_calibrated_pm10, - site_descriptive_stats_calibrated_pm10, - site_descriptive_stats_raw_pm2_5, - site_descriptive_stats_raw_pm10, - sorted_site, - ) = get_site_summarised_stats(kapchorwa_data, expected_hourly_records) - ( - site_calibrated_data_summary, - overall_site_calibrated, - overall_site_raw, - kapchorwa_sites_to_omit, - ) = get_site_data_completenness_stats(kapchorwa_site_summarised_data) - - ( - site_overall_raw_data_completeness_, - site_overall_calibrated_data_completeness_, - site_calibrated_data_summary_, - site_calibrated_pm2_5_descriptive_stat_, - site_calibrated_pm10_descriptive_stat_, - site_raw_pm2_5_descriptive_stat_, - site_raw_pm10_descriptive_stat_, - srt_ranked, - sites_to_omit_heading_, - sites_to_omit_text_, - sites_to_omit_, - ) = generate_site_stats_dictionaries_for_saving_to_file( - site_calibrated_data_summary, - overall_site_calibrated, - overall_site_raw, - site_descriptive_stats_calibrated_pm2_5, - site_descriptive_stats_calibrated_pm10, - site_descriptive_stats_raw_pm2_5, - site_descriptive_stats_raw_pm10, - kapchorwa_sites_to_omit, - sorted_site, - ) - write_city_analysis_to_word_file( - report_path, - city, - site_overall_raw_data_completeness_, - site_overall_calibrated_data_completeness_, - site_calibrated_data_summary_, - site_calibrated_pm2_5_descriptive_stat_, - site_calibrated_pm10_descriptive_stat_, - site_raw_pm2_5_descriptive_stat_, - site_raw_pm10_descriptive_stat_, - srt_ranked, - sites_to_omit_heading=sites_to_omit_heading_, - sites_to_omit_size=sites_to_omit_text_, - sites_to_omit=sites_to_omit_, - ) - - -def from_bigquery( - tenant, - start_date, - end_date, - frequency, - pollutants, - additional_columns=None, -): - if additional_columns is None: - additional_columns = [] - - decimal_places = 2 - - columns_ = [ - f"{BIGQUERY_EVENTS}.device_id AS device", - f"{BIGQUERY_SITES}.name AS site", - "FORMAT_DATETIME('%Y-%m-%d %H:%M:%S', timestamp) AS datetime", - f"{BIGQUERY_SITES}.approximate_latitude AS latitude", - f"{BIGQUERY_SITES}.approximate_longitude AS longitude", - ] - - columns = [ - f"{BIGQUERY_EVENTS}.device_id AS device", - f"{BIGQUERY_SITES}.name AS site", - "FORMAT_DATETIME('%Y-%m-%d %H:%M:%S', timestamp) AS datetime", - f"{BIGQUERY_SITES}.approximate_latitude AS latitude", - f"{BIGQUERY_SITES}.approximate_longitude AS longitude", - f"{BIGQUERY_SITES}.city AS city", - f"{BIGQUERY_SITES}.region AS region", - # f"{BIGQUERY_SITES_METADATA}.sub_county AS subcounty", - # f"{BIGQUERY_SITES_METADATA}.district AS district", - # f"{BIGQUERY_SITES_METADATA}.parish AS parish", - f"{BIGQUERY_SITES}.country AS country", - ] - columns.extend(additional_columns) - - for pollutant in pollutants: - pollutant_mapping = POLLUTANT_BIGQUERY_MAPPER.get(pollutant, []) - columns.extend( - [ - f"ROUND({mapping}, {decimal_places}) AS {mapping}" - for mapping in pollutant_mapping - ] - ) - - QUERY = ( - f"SELECT {', '.join(map(str, set(columns)))} " - f"FROM {BIGQUERY_EVENTS} " - f"JOIN {BIGQUERY_SITES} ON {BIGQUERY_SITES}.id = {BIGQUERY_EVENTS}.site_id " - f"WHERE {BIGQUERY_EVENTS}.tenant = '{tenant}' " - f"AND {BIGQUERY_EVENTS}.timestamp >= '{start_date}' " - f"AND {BIGQUERY_EVENTS}.timestamp <= '{end_date}' " - ) - - job_config = bigquery.QueryJobConfig() - job_config.use_query_cache = True - - dataframe = bigquery.Client().query(QUERY, job_config).result().to_dataframe() - dataframe.sort_values(["site", "datetime", "device"], ascending=True, inplace=True) - - return dataframe - - -def get_quarterly_data(tenant="airqo"): - start_date = str_to_date_2("2022-07-01 00:00:00") - end_date = str_to_date_2("2022-09-30 23:59:00") # previous_months_range(3) - frequency = "hourly" - pollutants = ["pm2_5", "pm10"] - - data = from_bigquery( - tenant=tenant, - start_date=start_date, - end_date=end_date, - frequency=frequency, - pollutants=pollutants, - ) - print(data.shape) - - data.to_csv("quarterly_data.csv") - - return data - - -def send_email(): - mail_content = """Hello, - This is the quarterly air quality analytics email. - We have attached the detailed analysis in a word document. - - Thank You -""" - # The mail addresses and password - sender_address = configuration.MAIL_SENDER_EMAILADDRESS - sender_pass = configuration.MAIL_SENDER_PASSWORD - receiver_addresses = configuration.MAIL_RECEIVER_EMAILADDRESS - recipients = receiver_addresses.split(",") - mail_subject = configuration.MAIL_SUBJECT - # Setup the MIME - message = MIMEMultipart() - message["From"] = sender_address - message["To"] = ", ".join(recipients) - message["Subject"] = mail_subject - # The subject line - # The body and the attachments for the mail - message.attach(MIMEText(mail_content, "plain")) - attach_file_name = ( - "demo.docx" ##TODO: read it from generated report saved in cloudstorage - ) - attach_file = open(attach_file_name, "rb") # Open the file as binary mode - payload = MIMEBase("application", "octate-stream") - payload.set_payload((attach_file).read()) - encoders.encode_base64(payload) # encode the attachment - print(attach_file_name) - # add payload header with filename - payload.add_header("Content-Disposition", "attachment", filename=attach_file_name) - message.attach(payload) - # Create SMTP session for sending the mail - session = smtplib.SMTP("smtp.gmail.com", 587) # use gmail with port - session.starttls() # enable security - session.login(sender_address, sender_pass) # login with mail_id and password - text = message.as_string() - session.sendmail(sender_address, recipients, text) - session.quit() - print("Mail Sent") - - -if __name__ == "__main__": - quarterly_data = get_quarterly_data() - print(quarterly_data.shape) - write_report_template() - generate_all_cities_analysis(quarterly_data, "demo.docx") - send_email() diff --git a/src/insights/jobs/reports/requirements.txt b/src/insights/jobs/reports/requirements.txt deleted file mode 100644 index 0260b3494a..0000000000 --- a/src/insights/jobs/reports/requirements.txt +++ /dev/null @@ -1,16 +0,0 @@ -google-api-core -google-auth -#google-cloud-bigquery -google-cloud-bigquery[pandas]==2.34.3 -google-cloud -numpy -pandas -python-dateutil -python-dotenv -gcsfs -pyarrow -joblib -requests -docx==0.2.4 -python-docx==0.8.11 -seaborn==0.12.0 \ No newline at end of file diff --git a/src/insights/jobs/reports/utils.py b/src/insights/jobs/reports/utils.py deleted file mode 100644 index 9e89b7cd12..0000000000 --- a/src/insights/jobs/reports/utils.py +++ /dev/null @@ -1,111 +0,0 @@ -import gcsfs -import joblib -import pandas as pd -from datetime import date, datetime -from dateutil.relativedelta import relativedelta -from google.cloud import storage - - -def previous_months_range(n): - """ - Function that calculates the previous months date ranges - Args: - n (int): represents the number of previous months range e.g 3 for three months ago - """ - # end_date = date.today() - end_date = datetime.now() - start_date = end_date + relativedelta(months=-n) - - return start_date, end_date - - -def str_to_date(st): - """converts a string to datetime""" - return datetime.strptime(st, "%Y-%m-%dT%H:%M:%S.%fZ") - - -def date_to_str(date): - """converts datetime to a string""" - return datetime.strftime(date, "%Y-%m-%dT%H:%M:%S.%fZ") - - -def str_to_date_2(st): - """ - Converts a string to datetime - """ - return datetime.strptime(st, "%Y-%m-%d %H:%M:%S") - - -def date_to_str_2(date): - """ - Converts datetime to a string - """ - return datetime.strftime(date, "%Y-%m-%d %H:00:00") - - -def is_key_exist(dict, key): - """checks wether specified key is available in the specified dictionary.""" - if key in dict.keys(): - return True - else: - return False - - -def get_csv_file_from_gcs(project_name, bucket_name, source_blob_name): - """gets csv file from google cloud storage and returns as a pandas dataframe""" - fs = gcsfs.GCSFileSystem(project=project_name) - with fs.open(f"{bucket_name}/{source_blob_name}") as file_handle: - df = pd.read_csv(file_handle) - return df - - -def get_trained_model_from_gcs(project_name, bucket_name, source_blob_name): - fs = gcsfs.GCSFileSystem(project=project_name) - fs.ls(bucket_name) - with fs.open(bucket_name + "/" + source_blob_name, "rb") as handle: - job = joblib.load(handle) - return job - - -def upload_csv_file_to_gcs( - project_name, credential, bucket_name, source_blob_name, source_file_name -): - storage_client = storage.Client.from_service_account_json( - json_credentials_path=credential - ) - - try: - bucket = storage_client.bucket(bucket_name) - blob = bucket.blob(source_blob_name) - - new_blob = bucket.rename_blob(blob, f"{datetime.now()}-{source_blob_name}") - print("Blob {} has been renamed to {}".format(blob.name, new_blob.name)) - except: - print("Upload csv: No file to updated") - - # upload csv - blob = bucket.blob(source_blob_name) - - blob.upload_from_filename(source_file_name) - - print("File {} uploaded to {}.".format(source_file_name, source_blob_name)) - - -def upload_trained_model_to_gcs( - trained_model, project_name, bucket_name, source_blob_name -): - fs = gcsfs.GCSFileSystem(project=project_name) - - # backup previous model - try: - fs.rename( - f"{bucket_name}/{source_blob_name}", - f"{bucket_name}/{datetime.now()}-{source_blob_name}", - ) - print("Bucket: previous model is backed up") - except: - print("Bucket: No file to updated") - - # store new model - with fs.open(bucket_name + "/" + source_blob_name, "wb") as handle: - job = joblib.dump(trained_model, handle) diff --git a/src/insights/main.py b/src/insights/main.py deleted file mode 100644 index 8bc17fb25f..0000000000 --- a/src/insights/main.py +++ /dev/null @@ -1,70 +0,0 @@ -"""Module for creating the flask app""" - -# System libraries - -# Third-Party libraries -from flasgger import Swagger -from flask import Flask, jsonify -from flask_caching import Cache -from flask_cors import CORS -from flask_excel import init_excel -from flask_restx import Api, Namespace -from marshmallow import ValidationError as MarshmallowValidationError - -# middlewares -from api.middlewares import middleware_blueprint -from api.middlewares.base_validator import ValidationError - -# Config -from config import CONFIGURATIONS, API_V2_BASE_URL - -rest_api = Api(doc=False) -rest_api_v2 = Namespace(name="v2", description="API version 2", path=API_V2_BASE_URL) -rest_api.add_namespace(rest_api_v2) -cache = Cache() - - -def initialize_blueprints(application): - """Initialize error handlers""" - - application.register_blueprint(middleware_blueprint) - - -def create_app(rest_api, config=CONFIGURATIONS): - """creates a flask app object from a config object""" - - app = Flask(__name__) - app.config.from_object(config) - - rest_api.init_app(app) - cache.init_app(app) - init_excel(app) - CORS(app) - Swagger(app) - - initialize_blueprints(app) - - import api.views - - return app - - -@rest_api.errorhandler(MarshmallowValidationError) -@middleware_blueprint.app_errorhandler(MarshmallowValidationError) -def handle_marshmallow_exception(error): - """Error handler called when a marshmallow ValidationError is raised""" - - error_message = { - "message": "An error occurred", - "status": "error", - "errors": error.messages, - } - return jsonify(error_message), 400 - - -@rest_api.errorhandler(ValidationError) -@middleware_blueprint.app_errorhandler(ValidationError) -def handle_exception(error): - """Error handler called when a ValidationError is raised""" - - return jsonify(error.error), 400 diff --git a/src/insights/manage.py b/src/insights/manage.py deleted file mode 100644 index 5e9073d484..0000000000 --- a/src/insights/manage.py +++ /dev/null @@ -1,46 +0,0 @@ -"""Module Application entry point""" - -# Third-Party libraries -from flasgger import swag_from -from flask import jsonify -from decouple import config as env_config - -# Config -from main import create_app, rest_api, rest_api_v2 -from config import config - -# utils -from api.utils.pre_request import PreRequest - -from api.models.base.data_processing import air_quality_data -from api.models.base.diurnal_data_processing import air_quality_data_diurnal - -config_name = env_config("FLASK_ENV", "production") - -app = create_app(rest_api, config=config[config_name]) - - -# @app.before_request -def check_tenant_param(): - return PreRequest.check_tenant() - - -@app.route("/health") -@swag_from("/api/docs/status.yml") -def index(): - return jsonify(dict(message=f"App status - OK.")) - - -# Add a new route for air_quality_data -@app.route("/api/v2/analytics/grid/report", methods=["POST"]) -def air_quality_data_route(): - return air_quality_data() # Call the air_quality_data function - - -@app.route("/api/v2/analytics/grid/report/diurnal", methods=["POST"]) -def air_quality_data_diurnal_route(): - return air_quality_data_diurnal() # Call the air_quality_data_diurnal function - - -if __name__ == "__main__": - app.run() diff --git a/src/insights/models/__init__.py b/src/insights/models/__init__.py new file mode 100644 index 0000000000..cc681d46a6 --- /dev/null +++ b/src/insights/models/__init__.py @@ -0,0 +1,5 @@ +from .site import SiteModel +from .events import EventsModel +from .exceedance import ExceedanceModel + +__all__ = ["SiteModel", "EventsModel", "ExceedanceModel"] diff --git a/src/insights/api/models/base/__init__.py b/src/insights/models/base/__init__.py similarity index 100% rename from src/insights/api/models/base/__init__.py rename to src/insights/models/base/__init__.py diff --git a/src/insights/api/models/base/base_model.py b/src/insights/models/base/base_model.py similarity index 77% rename from src/insights/api/models/base/base_model.py rename to src/insights/models/base/base_model.py index 5c0614bd1c..cd26a17e81 100644 --- a/src/insights/api/models/base/base_model.py +++ b/src/insights/models/base/base_model.py @@ -22,10 +22,4 @@ def __init__(self, tenant, collection_name): def _connect(self): client = MongoClient(APP_CONFIG.MONGO_URI) db = client[f"{APP_CONFIG.DB_NAME}_{self.tenant}"] - - # lets hard code the db here for dev purposes - # db = client['airqo_analytics'] return db - - def __repr__(self): - return f"{self.__class__.__name__}({self.tenant}, {self.collection_name})" diff --git a/src/insights/api/models/base/data_processing.py b/src/insights/models/base/data_processing.py similarity index 96% rename from src/insights/api/models/base/data_processing.py rename to src/insights/models/base/data_processing.py index 7e03868d35..b7898895ae 100644 --- a/src/insights/api/models/base/data_processing.py +++ b/src/insights/models/base/data_processing.py @@ -1,20 +1,17 @@ -from flask import Flask, request, jsonify +from flask import jsonify from datetime import datetime import numpy as np import logging -from api.utils.pollutants.report import ( +from utils.pollutants.report import ( fetch_air_quality_data, query_bigquery, results_to_dataframe, PManalysis, ) -# Configure logging -logging.basicConfig(filename="report_log.log", level=logging.INFO, filemode="w") - -def air_quality_data(): - data = request.get_json() +def air_quality_data(json_data): + data = json_data grid_id = data.get("grid_id", "") start_time_str = data.get("start_time", "") end_time_str = data.get("end_time", "") diff --git a/src/insights/api/models/base/diurnal_data_processing.py b/src/insights/models/base/diurnal_data_processing.py similarity index 93% rename from src/insights/api/models/base/diurnal_data_processing.py rename to src/insights/models/base/diurnal_data_processing.py index d98af83838..acbc401708 100644 --- a/src/insights/api/models/base/diurnal_data_processing.py +++ b/src/insights/models/base/diurnal_data_processing.py @@ -1,20 +1,17 @@ -from flask import Flask, request, jsonify +from flask import request, jsonify from datetime import datetime import logging import numpy as np -from api.utils.pollutants.report import ( +from utils.pollutants.report import ( fetch_air_quality_data, query_bigquery, results_to_dataframe, PManalysis, ) -# Configure logging -logging.basicConfig(filename="report_log.log", level=logging.INFO, filemode="w") - -def air_quality_data_diurnal(): - data = request.get_json() +def air_quality_data_diurnal(json_data): + data = json_data grid_id = data.get("grid_id", "") start_time_str = data.get("start_time", "") end_time_str = data.get("end_time", "") diff --git a/src/insights/api/models/base/model_operations.py b/src/insights/models/base/model_operations.py similarity index 100% rename from src/insights/api/models/base/model_operations.py rename to src/insights/models/base/model_operations.py diff --git a/src/insights/api/models/events.py b/src/insights/models/events.py similarity index 99% rename from src/insights/api/models/events.py rename to src/insights/models/events.py index 8c993f7223..155770a16b 100644 --- a/src/insights/api/models/events.py +++ b/src/insights/models/events.py @@ -5,13 +5,16 @@ import pytz from google.cloud import bigquery -from api.models.base.base_model import BasePyMongoModel -from api.utils.dates import date_to_str -from api.utils.pollutants.pm_25 import ( +from config import CONFIGURATIONS +from models.base.base_model import BasePyMongoModel +from utils.dates import date_to_str +from utils.pollutants.pm_25 import ( BIGQUERY_FREQUENCY_MAPPER, WEATHER_FIELDS_MAPPER, ) -from main import cache, CONFIGURATIONS + + +# from app import cache class EventsModel(BasePyMongoModel): @@ -41,7 +44,7 @@ def __init__(self, tenant): super().__init__(tenant, collection_name="events") @classmethod - @cache.memoize() + # @cache.memoize() def download_from_bigquery( cls, devices, @@ -525,7 +528,7 @@ def save_devices_summary_data( job.result() @classmethod - @cache.memoize() + # #@cache.memoize() def get_devices_summary( cls, airqloud, start_date_time, end_date_time, grid: str, cohort: str ) -> list: @@ -670,7 +673,7 @@ def get_devices_summary( return dataframe.to_dict("records") @classmethod - @cache.memoize() + # @cache.memoize() def bigquery_mobile_device_measurements( cls, tenant, device_numbers: list, start_date_time, end_date_time ): @@ -725,7 +728,7 @@ def _project_pollutant_filter(pollutants): return filtered - @cache.memoize() + # @cache.memoize() def get_downloadable_events( self, sites, start_date, end_date, frequency, pollutants ): @@ -789,7 +792,7 @@ def get_downloadable_events( .exec() ) - @cache.memoize() + # @cache.memoize() def get_averages_by_pollutant(self, start_date, end_date, pollutant): return ( self.date_range("values.time", start_date=start_date, end_date=end_date) @@ -811,7 +814,7 @@ def get_averages_by_pollutant(self, start_date, end_date, pollutant): .exec() ) - @cache.memoize() + ##@cache.memoize() def get_averages_by_pollutant_from_bigquery(self, start_date, end_date, pollutant): if pollutant not in ["pm2_5", "pm10", "no2", "pm1"]: raise Exception("Invalid pollutant") @@ -831,7 +834,7 @@ def get_averages_by_pollutant_from_bigquery(self, start_date, end_date, pollutan dataframe["value"] = dataframe["value"].apply(lambda x: round(x, 2)) return dataframe.to_dict("records") - @cache.memoize() + ##@cache.memoize() def get_device_averages_from_bigquery( self, start_date, end_date, pollutant, devices ): @@ -854,7 +857,7 @@ def get_device_averages_from_bigquery( dataframe["value"] = dataframe["value"].apply(lambda x: round(x, 2)) return dataframe.to_dict("records") - @cache.memoize() + ##@cache.memoize() def get_device_readings_from_bigquery( self, start_date, end_date, pollutant, devices ): @@ -884,7 +887,7 @@ def get_device_readings_from_bigquery( return dataframe - @cache.memoize() + ##@cache.memoize() def get_chart_events(self, sites, start_date, end_date, pollutant, frequency): time_format_mapper = { "raw": "%Y-%m-%dT%H:%M:%S%z", @@ -942,7 +945,7 @@ def get_chart_events(self, sites, start_date, end_date, pollutant, frequency): .exec() ) - @cache.memoize() + ##@cache.memoize() def get_d3_chart_events(self, sites, start_date, end_date, pollutant, frequency): diurnal_end_date = datetime.strptime(end_date, "%Y-%m-%dT%H:%M:%S.%fZ").replace( tzinfo=pytz.utc @@ -1003,7 +1006,7 @@ def get_d3_chart_events(self, sites, start_date, end_date, pollutant, frequency) .exec() ) - @cache.memoize() + ##@cache.memoize() def get_d3_chart_events_v2( self, sites, start_date, end_date, pollutant, frequency, tenant ): diff --git a/src/insights/api/models/exceedance.py b/src/insights/models/exceedance.py similarity index 98% rename from src/insights/api/models/exceedance.py rename to src/insights/models/exceedance.py index 020f4d93e3..e7a8aec252 100644 --- a/src/insights/api/models/exceedance.py +++ b/src/insights/models/exceedance.py @@ -1,5 +1,4 @@ -from api.models.base.base_model import BasePyMongoModel -from main import cache +from models.base.base_model import BasePyMongoModel class ExceedanceModel(BasePyMongoModel): @@ -90,7 +89,7 @@ def project_by_standard(self, standard): return self.project(site_id={"$toObjectId": "$site_id"}, who=1) return self.project(site_id={"$toObjectId": "$site_id"}, aqi=1) - @cache.memoize() + # @cache.memoize() def get_exceedances(self, start_date, end_date, pollutant, standard, sites=None): if sites: return self.get_exceedances_by_sites( diff --git a/src/insights/api/models/site.py b/src/insights/models/site.py similarity index 94% rename from src/insights/api/models/site.py rename to src/insights/models/site.py index 8d2e515f42..cd64fe0657 100644 --- a/src/insights/api/models/site.py +++ b/src/insights/models/site.py @@ -1,13 +1,11 @@ -from api.models.base.base_model import BasePyMongoModel - -from main import cache +from models.base.base_model import BasePyMongoModel class SiteModel(BasePyMongoModel): def __init__(self, tenant): super().__init__(tenant, collection_name="sites") - @cache.memoize() + # @cache.memoize() def get_sites(self, sites=None): if sites: return self.get_specific_sites(sites, id_key="site_id") @@ -32,7 +30,7 @@ def get_specific_sites(self, sites, id_key="_id"): .exec() ) - @cache.memoize() + # @cache.memoize() def get_all_sites(self): return ( self.lookup( diff --git a/src/insights/namespaces/__init__.py b/src/insights/namespaces/__init__.py new file mode 100644 index 0000000000..703b7ca081 --- /dev/null +++ b/src/insights/namespaces/__init__.py @@ -0,0 +1,5 @@ +from .dashboard import dashboard_api +from .data import data_export_api +from .report import auto_report_api + +__all__ = ["dashboard_api", "data_export_api", "auto_report_api"] diff --git a/src/insights/api/views/dashboard.py b/src/insights/namespaces/dashboard.py similarity index 80% rename from src/insights/api/views/dashboard.py rename to src/insights/namespaces/dashboard.py index 5b09f93a3b..d0530a4ca1 100644 --- a/src/insights/api/views/dashboard.py +++ b/src/insights/namespaces/dashboard.py @@ -1,41 +1,38 @@ # Third-party libraries - import math from flasgger import swag_from from flask import request +from flask_restx import Namespace from flask_restx import Resource +from marshmallow import ValidationError -from api.models import ( +from models import ( EventsModel, SiteModel, ExceedanceModel, ) -from api.utils.data_formatters import filter_non_private_entities, Entity +from utils.data_formatters import filter_non_private_entities, Entity # Middlewares -from api.utils.http import create_response, Status -from api.utils.pollutants import ( +from utils.http import create_response, Status +from utils.pollutants import ( generate_pie_chart_data, - d3_generate_pie_chart_data, PM_COLOR_CATEGORY, set_pm25_category_background, ) -from api.utils.request_validators import validate_request_json -from main import rest_api_v2 +from utils.validators.dashboard import ( + DailyAveragesSchema, + DeviceDailyAveragesSchema, + ExceedancesSchema, +) + +dashboard_api = Namespace("dashboard", description="Dashboard API endpoints") -@rest_api_v2.route("/dashboard/chart/data") +@dashboard_api.route("/chart/data") class ChartDataResource(Resource): @swag_from("/api/docs/dashboard/customised_chart_post.yml") - @validate_request_json( - "sites|required:list", - "startDate|required:datetime", - "endDate|required:datetime", - "frequency|required:str", - "pollutant|required:str", - "chartType|required:str", - ) def post(self): tenant = request.args.get("tenant", "airqo") @@ -60,14 +57,14 @@ def post(self): chart_labels = [] for record in data: - site = record.get("site", {}) + site = record.get("site") site_name = f"{site.get('name') or site.get('description') or site.get('generated_name')}" dataset = {} sorted_values = sorted( - record.get("values", []), key=lambda item: item.get("time") + record.get("values"), key=lambda item: item.get("time") ) if chart_type.lower() == "pie": category_count = generate_pie_chart_data( @@ -79,9 +76,7 @@ def post(self): except ValueError: values = [] labels = [] - background_colors = [ - PM_COLOR_CATEGORY.get(label, "#808080") for label in labels - ] + background_colors = [PM_COLOR_CATEGORY.get(label) for label in labels] color = background_colors dataset.update( @@ -131,17 +126,9 @@ def post(self): ) -@rest_api_v2.route("/dashboard/chart/d3/data") +@dashboard_api.route("/chart/d3/data") class D3ChartDataResource(Resource): @swag_from("/api/docs/dashboard/d3_chart_data_post.yml") - @validate_request_json( - "sites|required:list", - "startDate|required:datetime", - "endDate|required:datetime", - "frequency|required:str", - "pollutant|required:str", - "chartType|required:str", - ) def post(self): tenant = request.args.get("tenant", "airqo") @@ -170,7 +157,7 @@ def post(self): ) -@rest_api_v2.route("/dashboard/sites") +@dashboard_api.route("/sites") class MonitoringSiteResource(Resource): @swag_from("/api/docs/dashboard/monitoring_site_get.yml") def get(self): @@ -186,18 +173,18 @@ def get(self): ) -@rest_api_v2.route("/dashboard/historical/daily-averages") +@dashboard_api.route("/historical/daily-averages") class DailyAveragesResource(Resource): @swag_from("/api/docs/dashboard/device_daily_measurements_get.yml") - @validate_request_json( - "pollutant|required:str", - "startDate|required:datetime", - "endDate|required:datetime", - "sites|optional:list", - ) def post(self): tenant = request.args.get("tenant", "airqo") - json_data = request.get_json() + try: + json_data = DailyAveragesSchema().load(dashboard_api.payload) + except ValidationError as err: + return ( + create_response(f" {err.messages}", success=False), + Status.HTTP_400_BAD_REQUEST, + ) pollutant = json_data["pollutant"] start_date = json_data["startDate"] end_date = json_data["endDate"] @@ -217,7 +204,7 @@ def post(self): background_colors = [] for v in data: - value = v.get("value", None) + value = v.get("value") site_id = v.get("site_id", None) if not site_id or not value or math.isnan(value): @@ -250,18 +237,18 @@ def post(self): ) -@rest_api_v2.route("/dashboard/historical/daily-averages-devices") -class DailyAveragesResource2(Resource): +@dashboard_api.route("/historical/daily-averages-devices") +class DeviceDailyAveragesResource(Resource): @swag_from("/api/docs/dashboard/device_daily_measurements_get.yml") - @validate_request_json( - "pollutant|required:str", - "startDate|required:datetime", - "endDate|required:datetime", - "devices|optional:list", - ) def post(self): tenant = request.args.get("tenant", "airqo") - json_data = request.get_json() + try: + json_data = DeviceDailyAveragesSchema().load(dashboard_api.payload) + except ValidationError as err: + return ( + create_response(f" {err.messages}", success=False), + Status.HTTP_400_BAD_REQUEST, + ) pollutant = json_data["pollutant"] start_date = json_data["startDate"] end_date = json_data["endDate"] @@ -301,20 +288,18 @@ def post(self): ) -@rest_api_v2.route("/dashboard/exceedances") +@dashboard_api.route("/exceedances") class ExceedancesResource(Resource): @swag_from("/api/docs/dashboard/exceedances_post.yml") - @validate_request_json( - "pollutant|required:str", - "standard|required:str", - "startDate|required:datetime", - "endDate|required:datetime", - "sites|optional:list", - ) def post(self): tenant = request.args.get("tenant", "airqo") - - json_data = request.get_json() + try: + json_data = ExceedancesSchema().load(dashboard_api.payload) + except ValidationError as err: + return ( + create_response(f" {err.messages}", success=False), + Status.HTTP_400_BAD_REQUEST, + ) pollutant = json_data["pollutant"] standard = json_data["standard"] start_date = json_data["startDate"] @@ -337,16 +322,9 @@ def post(self): ) -@rest_api_v2.route("/dashboard/exceedances-devices") -class ExceedancesResource2(Resource): +@dashboard_api.route("/exceedances-devices") +class DeviceExceedancesResource(Resource): @swag_from("/api/docs/dashboard/exceedances_post.yml") - @validate_request_json( - "pollutant|required:str", - "standard|required:str", - "startDate|required:datetime", - "endDate|required:datetime", - "devices|optional:list", - ) def post(self): tenant = request.args.get("tenant", "airqo") diff --git a/src/insights/namespaces/data.py b/src/insights/namespaces/data.py new file mode 100644 index 0000000000..b5ac357f44 --- /dev/null +++ b/src/insights/namespaces/data.py @@ -0,0 +1,271 @@ +import traceback + +import flask_excel as excel +import pandas as pd +from flasgger import swag_from +from flask_restx import Resource, Namespace +from marshmallow import ValidationError + +import tasks +from models import ( + EventsModel, +) +from utils.data_formatters import ( + filter_non_private_entities, + Entity, + compute_airqloud_summary, +) +from utils.data_formatters import ( + format_to_aqcsv, +) +from utils.dates import str_to_date, date_to_str +from utils.http import create_response, Status +from utils.validators.data import ( + DataExportSchema, + DataSummarySchema, + BulkDataExportSchema, +) + +data_export_api = Namespace("data", description="Data export APIs", path="/") +parser = data_export_api.parser() +parser.add_argument( + "userId", + type=str, + required=False, + help="User ID", +) + + +@data_export_api.route("/data-download") +class DataExportResource(Resource): + @swag_from("/api/docs/dashboard/download_custom_data_post.yml") + def post(self): + try: + json_data = DataExportSchema().load(data_export_api.payload) + except ValidationError as err: + return ( + create_response(f" {err.messages}", success=False), + Status.HTTP_400_BAD_REQUEST, + ) + + start_date = json_data["startDateTime"] + end_date = json_data["endDateTime"] + sites = filter_non_private_entities( + entities=json_data.get("sites", []), entity_type=Entity.SITES + ) + devices = filter_non_private_entities( + entities=json_data.get("devices", []), entity_type=Entity.DEVICES + ) + airqlouds = json_data.get("airqlouds", []) + pollutants = json_data.get("pollutants", []) + weather_fields = json_data.get("weatherFields", []) + frequency = f"{json_data.get('frequency', [])}".lower() + download_type = f"{json_data.get('downloadType', [])}".lower() + output_format = f"{json_data.get('outputFormat', [])}".lower() + + postfix = "-" if output_format == "airqo-standard" else "-aqcsv-" + + try: + data_frame = EventsModel.download_from_bigquery( + sites=sites, + devices=devices, + airqlouds=airqlouds, + start_date=start_date, + end_date=end_date, + frequency=frequency, + pollutants=pollutants, + weather_fields=weather_fields, + ) + + if data_frame.empty: + return ( + create_response("No data found", data=[]), + Status.HTTP_404_NOT_FOUND, + ) + if output_format == "aqcsv": + data_frame = format_to_aqcsv( + data=data_frame, frequency=frequency, pollutants=pollutants + ) + + if download_type == "json": + return ( + create_response( + "air-quality data download successful", + data=data_frame.to_dict(orient="records"), + ), + Status.HTTP_200_OK, + ) + + return excel.make_response_from_records( + data_frame.to_dict(orient="records"), + "csv", + file_name=f"{frequency}-air-quality{postfix}data", + ) + except Exception as ex: + print(ex) + traceback.print_exc() + return ( + create_response( + "An Error occurred while processing your request. Please contact support", + success=False, + ), + Status.HTTP_500_INTERNAL_SERVER_ERROR, + ) + + +@data_export_api.route("/bulk-data-download") +class BulkDataExportResource(Resource): + def post(self): + try: + json_data = BulkDataExportSchema().load(data_export_api.payload) + except ValidationError as err: + return ( + create_response(f" {err.messages}", success=False), + Status.HTTP_400_BAD_REQUEST, + ) + user_id = json_data.get( + "userId", + ) + start_date = json_data["startDateTime"] + end_date = json_data["endDateTime"] + meta_data = json_data.get("meta_data", []) + sites = filter_non_private_entities( + entities=json_data.get("sites", []), entity_type=Entity.SITES + ) + devices = filter_non_private_entities( + entities=json_data.get("devices", []), entity_type=Entity.DEVICES + ) + airqlouds = json_data.get("airqlouds", []) + frequency = f"{json_data.get('frequency', [])}".lower() + export_format = f"{json_data.get('exportFormat', 'csv')}".lower() + pollutants = json_data.get("pollutants", []) + weather_fields = json_data.get("weatherFields", []) + output_format = f"{json_data.get('outputFormat', 'airqo-standard')}".lower() + try: + # NOTE: order of args must match the celery task definition lest errors occur + tasks.export_data.apply_async( + args=[ + devices, + sites, + airqlouds, + start_date, + end_date, + frequency, + pollutants, + weather_fields, + output_format, + export_format, + user_id, + ], + countdown=3, + task_id=user_id, + ) + return ( + create_response("Data request successfully received", data=None), + Status.HTTP_200_OK, + ) + except Exception as ex: + print(ex) + traceback.print_exc() + return ( + create_response( + "An Error occurred while processing your request. Please contact support", + success=False, + ), + Status.HTTP_500_INTERNAL_SERVER_ERROR, + ) + + def get(self): + try: + args = parser.parse_args() + userId = args.get("userId") + if userId is None: + return ( + create_response( + "An Error occurred while processing your request. Please contact support", + success=False, + ), + Status.HTTP_500_INTERNAL_SERVER_ERROR, + ) + result = tasks.export_data.AsyncResult(userId) + if result.ready(): + data = str(result.result) + result.forget() + return ( + create_response("request successfully received", data=data), + Status.HTTP_200_OK, + ) + else: + return ( + create_response("Data export is still in progress", data=None), + Status.HTTP_200_OK, + ) + + except Exception as ex: + print(ex) + traceback.print_exc() + return ( + create_response( + "An Error occurred while processing your request. Please contact support", + success=False, + ), + Status.HTTP_500_INTERNAL_SERVER_ERROR, + ) + + +@data_export_api.route("/summary") +class DataSummaryResource(Resource): + def post(self): + try: + json_data = DataSummarySchema().load(data_export_api.payload) + except ValidationError as err: + return (create_response(f" {err.messages}", success=False),) + try: + start_date_time = str_to_date(json_data["startDateTime"]) + end_date_time = str_to_date(json_data["endDateTime"]) + airqloud = str(json_data.get("airqloud")) + cohort = str(json_data.get("cohort")) + grid = str(json_data.get("grid")) + + start_date_time = date_to_str(start_date_time, format="%Y-%m-%dT%H:00:00Z") + end_date_time = date_to_str(end_date_time, format="%Y-%m-%dT%H:00:00Z") + data = EventsModel.get_devices_summary( + airqloud=airqloud, + start_date_time=start_date_time, + end_date_time=end_date_time, + grid=grid, + cohort=cohort, + ) + + summary = compute_airqloud_summary( + data=pd.DataFrame(data), + start_date_time=start_date_time, + end_date_time=end_date_time, + ) + + if len(summary) == 0: + return ( + create_response( + f"No data found for grid {grid} from {start_date_time} to {end_date_time}", + data={}, + success=False, + ), + Status.HTTP_200_OK, + ) + + return ( + create_response("successful", data=summary), + Status.HTTP_200_OK, + ) + + except Exception as ex: + print(ex) + traceback.print_exc() + return ( + create_response( + "An Error occurred while processing your request. Please contact support", + data={}, + success=False, + ), + Status.HTTP_500_INTERNAL_SERVER_ERROR, + ) diff --git a/src/insights/namespaces/report.py b/src/insights/namespaces/report.py new file mode 100644 index 0000000000..a1b7657e3e --- /dev/null +++ b/src/insights/namespaces/report.py @@ -0,0 +1,38 @@ +from flasgger import swag_from +from flask_restx import Namespace, Resource +from marshmallow import ValidationError + +from models.base.data_processing import air_quality_data +from models.base.diurnal_data_processing import air_quality_data_diurnal +from utils.http import create_response, Status +from utils.validators.report import AutoReportSchema + +auto_report_api = Namespace(name="auto-report", description="Auto Report API", path="/") + + +@auto_report_api.route("/grid/report") +class GridReportResource(Resource): + @swag_from("/api/docs/report/generate_grid_report_post.yml") + def post(self): + try: + json_data = AutoReportSchema().load(auto_report_api.payload) + except ValidationError as err: + return ( + create_response(f" {err.messages}", data=None, success=False), + Status.HTTP_400_BAD_REQUEST, + ) + return air_quality_data(json_data) + + +@auto_report_api.route("grid/report/diurnal") +class DiurnalReportResource(Resource): + @swag_from("/api/docs/report/generate_diurnal_report_post.yml") + def post(self): + try: + json_data = AutoReportSchema().load(auto_report_api.payload) + except ValidationError as err: + return ( + create_response(f" {err.messages}", data=None, success=False), + Status.HTTP_400_BAD_REQUEST, + ) + return air_quality_data_diurnal(json_data) diff --git a/src/insights/requirements.txt b/src/insights/requirements.txt index e3f5c21b85..a7ad722afd 100644 --- a/src/insights/requirements.txt +++ b/src/insights/requirements.txt @@ -1,27 +1,22 @@ -flask <= 2.0.3 -flask_excel <= 0.0.7 -flask-restx <= 0.5.1 -flask-cors <= 3.0.10 -Flask-Caching <= 2.0.0 -flasgger <= 0.9.5 -Werkzeug <= 2.0.3 -pandas -pymongo~=4.5.0 -pytz~=2023.3.post1 +flask <= 3.0.3 +flask-restx <= 1.3.0 +flask-cors~=4.0.1 +Flask-Caching <= 2.3.0 +Flask-Excel ~=0.0.7 +flasgger ~= 0.9.7.1 +pandas~=2.2.2 +pymongo~=4.7.2 +pytz~=2024.1 flask_pymongo <= 2.3.0 -python-decouple -python-dotenv -marshmallow -google-cloud-bigquery[pandas] -redis -python-decouple -celery -google-cloud-storage -gunicorn - -# Ports for stable python 3 functionality -dataclasses~=0.6 -pytest~=7.4.2 -numpy -python-dateutil~=2.8.2 -timezonefinder \ No newline at end of file +python-dotenv~=1.0.1 +python-decouple~=3.8 +marshmallow~=3.21.2 +google-cloud-bigquery~=3.22.0 +redis~=5.0.4 +celery~=5.4.0 +google-cloud-storage~=2.16.0 +gunicorn~=22.0.0 +pytest~=8.2.0 +numpy~=1.26.4 +timezonefinder~=6.5.0 +db-dtypes~=1.2.0 \ No newline at end of file diff --git a/src/insights/tasks.py b/src/insights/tasks.py new file mode 100644 index 0000000000..0ecd02eb2e --- /dev/null +++ b/src/insights/tasks.py @@ -0,0 +1,66 @@ +from datetime import datetime + +from celery import shared_task +from celery.utils.log import get_task_logger +from google.cloud import storage + +from models import EventsModel +from utils.data_formatters import format_to_aqcsv + +# TODO: Look into using flower to monitor tasks +celery_logger = get_task_logger(__name__) + + +@shared_task(bind=True, name="tasks.export_data", ignore_result=False) +def export_data( + self, + devices, + sites, + airqlouds, + start_date, + end_date, + frequency, + pollutants, + weather_fields, + output_format, + export_format, + user_id, +): + try: + celery_logger.info("Exporting data") + dataframe = EventsModel.download_from_bigquery( + devices=devices, + sites=sites, + airqlouds=airqlouds, + start_date=start_date, + end_date=end_date, + frequency=frequency, + pollutants=pollutants, + weather_fields=weather_fields, + ) + if output_format == "aqcsv": + dataframe = format_to_aqcsv( + dataframe, + frequency=frequency, + pollutants=pollutants, + ) + + client = storage.Client() + bucket = client.bucket(bucket_name="data_export_datasets") + filename = f'{user_id}_{datetime.now().strftime("%Y%m%d%H%M%S")}' + blob = bucket.blob(filename) + if export_format == "json": + blob.upload_from_string( + dataframe.to_json(orient="records"), "application/json" + ) + else: + blob.upload_from_string(dataframe.to_csv(index=False), "text/csv") + + # TODO: Some security concerns here. Buckets are publicly readable, but not private. Needs to be reviewed + file_url = blob.public_url + + celery_logger.info("Data export completed successfully") + return file_url + + except Exception as e: + celery_logger.error(f"Error while exporting data: {e}") diff --git a/src/insights/api/tests/conftest.py b/src/insights/tests/conftest.py similarity index 100% rename from src/insights/api/tests/conftest.py rename to src/insights/tests/conftest.py diff --git a/src/insights/api/tests/test_data_formatters.py b/src/insights/tests/test_data_formatters.py similarity index 97% rename from src/insights/api/tests/test_data_formatters.py rename to src/insights/tests/test_data_formatters.py index 2a47f8925a..4c422a2dc8 100644 --- a/src/insights/api/tests/test_data_formatters.py +++ b/src/insights/tests/test_data_formatters.py @@ -3,7 +3,7 @@ import pytest -from api.utils.data_formatters import format_to_aqcsv +from utils.data_formatters import format_to_aqcsv from conftest import mock_dataframe, mock_aqcsv_globals diff --git a/src/insights/api/utils/__init__.py b/src/insights/utils/__init__.py similarity index 100% rename from src/insights/api/utils/__init__.py rename to src/insights/utils/__init__.py diff --git a/src/insights/api/utils/case_converters.py b/src/insights/utils/case_converters.py similarity index 100% rename from src/insights/api/utils/case_converters.py rename to src/insights/utils/case_converters.py diff --git a/src/insights/api/utils/data_formatters.py b/src/insights/utils/data_formatters.py similarity index 97% rename from src/insights/api/utils/data_formatters.py rename to src/insights/utils/data_formatters.py index 8386df432e..013aa80d2c 100644 --- a/src/insights/api/utils/data_formatters.py +++ b/src/insights/utils/data_formatters.py @@ -4,8 +4,8 @@ import pandas as pd import requests -from api.utils.dates import str_to_aqcsv_date_format -from api.utils.pollutants.pm_25 import ( +from utils.dates import str_to_aqcsv_date_format +from utils.pollutants.pm_25 import ( AQCSV_PARAMETER_MAPPER, FREQUENCY_MAPPER, AQCSV_UNIT_MAPPER, @@ -194,14 +194,13 @@ def compute_airqloud_summary( def format_to_aqcsv( - data: list, pollutants: list, frequency: str -) -> list[Any] | list[dict]: + data: pd.DataFrame, pollutants: list, frequency: str +) -> pd.DataFrame: # Compulsory fields : site, datetime, parameter, duration, value, unit, qc, poc, data_status, # Optional fields : lat, lon, pollutant_mappers = BIGQUERY_FREQUENCY_MAPPER.get(frequency) - - dataframe = pd.DataFrame(data) + dataframe = data.copy() if dataframe.empty: return [] dataframe.rename( @@ -257,7 +256,7 @@ def format_to_aqcsv( inplace=True, ) - return dataframe.to_dict("records") + return dataframe def tenant_to_str(tenant: str) -> str: diff --git a/src/insights/api/utils/dates.py b/src/insights/utils/dates.py similarity index 100% rename from src/insights/api/utils/dates.py rename to src/insights/utils/dates.py diff --git a/src/insights/api/utils/exceptions.py b/src/insights/utils/exceptions.py similarity index 100% rename from src/insights/api/utils/exceptions.py rename to src/insights/utils/exceptions.py diff --git a/src/insights/api/utils/http.py b/src/insights/utils/http.py similarity index 100% rename from src/insights/api/utils/http.py rename to src/insights/utils/http.py diff --git a/src/insights/api/utils/messages.py b/src/insights/utils/messages.py similarity index 100% rename from src/insights/api/utils/messages.py rename to src/insights/utils/messages.py diff --git a/src/insights/api/utils/pollutants/__init__.py b/src/insights/utils/pollutants/__init__.py similarity index 100% rename from src/insights/api/utils/pollutants/__init__.py rename to src/insights/utils/pollutants/__init__.py diff --git a/src/insights/api/utils/pollutants/charts.py b/src/insights/utils/pollutants/charts.py similarity index 100% rename from src/insights/api/utils/pollutants/charts.py rename to src/insights/utils/pollutants/charts.py diff --git a/src/insights/api/utils/pollutants/date.py b/src/insights/utils/pollutants/date.py similarity index 91% rename from src/insights/api/utils/pollutants/date.py rename to src/insights/utils/pollutants/date.py index ab4af79296..edea8ecad7 100644 --- a/src/insights/api/utils/pollutants/date.py +++ b/src/insights/utils/pollutants/date.py @@ -1,6 +1,6 @@ from datetime import datetime -from api.utils.dates import str_to_date +from utils.dates import str_to_date FREQUENCY_FORMAT_MAPPER = { "monthly": "%B %Y", diff --git a/src/insights/api/utils/pollutants/pm_25.py b/src/insights/utils/pollutants/pm_25.py similarity index 100% rename from src/insights/api/utils/pollutants/pm_25.py rename to src/insights/utils/pollutants/pm_25.py diff --git a/src/insights/api/utils/pollutants/report.py b/src/insights/utils/pollutants/report.py similarity index 100% rename from src/insights/api/utils/pollutants/report.py rename to src/insights/utils/pollutants/report.py diff --git a/src/insights/api/utils/pollutants/units.py b/src/insights/utils/pollutants/units.py similarity index 100% rename from src/insights/api/utils/pollutants/units.py rename to src/insights/utils/pollutants/units.py diff --git a/src/insights/api/utils/pre_request.py b/src/insights/utils/pre_request.py similarity index 100% rename from src/insights/api/utils/pre_request.py rename to src/insights/utils/pre_request.py diff --git a/src/insights/jobs/__init__.py b/src/insights/utils/validators/__init__.py similarity index 100% rename from src/insights/jobs/__init__.py rename to src/insights/utils/validators/__init__.py diff --git a/src/insights/utils/validators/dashboard.py b/src/insights/utils/validators/dashboard.py new file mode 100644 index 0000000000..0e2cf98fbf --- /dev/null +++ b/src/insights/utils/validators/dashboard.py @@ -0,0 +1,25 @@ +from marshmallow import Schema, fields + + +class DailyAveragesSchema(Schema): + startDate = fields.DateTime(required=True) + endDate = fields.DateTime(required=True) + pollutant = fields.String(required=True) + sites = fields.List(fields.String()) + + +class DeviceDailyAveragesSchema(DailyAveragesSchema): + devices = fields.List(fields.String(), required=True) + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.sites = None + + +class ExceedancesSchema(DailyAveragesSchema): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + +class DeviceExceedancesSchema(ExceedancesSchema): + devices = fields.List(fields.String(), required=True) diff --git a/src/insights/utils/validators/data.py b/src/insights/utils/validators/data.py new file mode 100644 index 0000000000..03f7f51713 --- /dev/null +++ b/src/insights/utils/validators/data.py @@ -0,0 +1,61 @@ +from marshmallow import ( + Schema, + fields, + validate, + validates_schema, + ValidationError, + post_load, +) + + +class DataExportSchema(Schema): + startDateTime = fields.DateTime(required=True) + endDateTime = fields.DateTime(required=True) + frequency = fields.String(validate=validate.OneOf(["hourly", "daily", "raw"])) + downloadType = fields.String(validate=validate.OneOf(["csv", "json"])) + outputFormat = fields.String(validate=validate.OneOf(["airqo-standard", "aqcsv"])) + pollutants = fields.List( + fields.String(validate=validate.OneOf(["pm2_5", "pm10", "no2"])), + validate=validate.Length(min=1), + ) + weatherFields = fields.List( + fields.String( + validate=validate.OneOf(["temperature", "humidity", "wind_speed"]) + ), + ) + sites = fields.List(fields.String()) + devices = fields.List(fields.String()) + airqlouds = fields.List(fields.String()) + + @validates_schema + def check_exclusive_fields_present(self, data, **kwargs): + exclusive_fields = ["sites", "devices", "airqlouds"] + count = sum( + bool(data.get(field)) + for field in exclusive_fields + if data.get(field) is not None + ) + + if count != 1: + raise ValidationError( + "Ensure to specify either a list of airqlouds, sites or devices only." + ) + + +class BulkDataExportSchema(DataExportSchema): + userId = fields.String(required=True) + metadata = fields.Dict() + exportFormat = fields.String() + + @post_load + def set_export_format(self, data, **kwargs): + data["exportFormat"] = data.get("outputFormat", None) + return data + + +class DataSummarySchema(Schema): + startDateTime = fields.DateTime(required=True) + endDateTime = fields.DateTime(required=True) + airqloud = fields.String() + cohort = fields.String() + grid = fields.String() diff --git a/src/insights/utils/validators/report.py b/src/insights/utils/validators/report.py new file mode 100644 index 0000000000..d8eb932f2d --- /dev/null +++ b/src/insights/utils/validators/report.py @@ -0,0 +1,7 @@ +from marshmallow import Schema, fields + + +class AutoReportSchema(Schema): + startTime = fields.DateTime(required=True) + endTime = fields.DateTime(required=True) + gridId = fields.String() diff --git a/src/predict/README.md b/src/predict/README.md index d8820b0288..beb7957796 100644 --- a/src/predict/README.md +++ b/src/predict/README.md @@ -1,4 +1,4 @@ -# Predict Microservice Guide +# Predict Microservice Guide. This repository contains code for the predict microservice, and contains 3 main directories * API - code for predict API * Jobs - This contains code for three machine learning models `forecast model training`, `forecasting job`