Skip to content

Commit

Permalink
Merge pull request #5 from aitormagan/feature/vaccines
Browse files Browse the repository at this point in the history
Feature/vaccines
  • Loading branch information
aitormagan authored Jan 12, 2021
2 parents f179aa2 + a55450b commit 57d6db6
Show file tree
Hide file tree
Showing 18 changed files with 519 additions and 108 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/python-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,9 @@ jobs:
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
- name: Test with nosetests
run: |
nosetests --with-coverage --cover-xml --cover-inclusive --cover-package=helpers,main_daily,main_weekly
nosetests --with-coverage --cover-xml --cover-inclusive --cover-package=helpers,main_daily,main_weekly,main_vaccination
- name: Coveralls
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
coveralls
coveralls --service=github
16 changes: 11 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,12 @@
[![Build Status](https://github.com/aitormagan/covid19spainbot/workflows/Python%20Tests/badge.svg)](https://github.com//aitormagan/covid19spainbot/actions)
[![Coverage Status](https://coveralls.io/repos/github/aitormagan/covid19spainbot/badge.svg?branch=master)](https://coveralls.io/github/aitormagan/covid19spainbot?branch=master)

Script que publica en Twitter los nuevos PCR+ y fallecimientos provocados por el SARS-CoV-2 en España y disgregado por
las diferentes comunidades autónomas.
Script que publica en Twitter las siguientes estadísticas relacionadas con el SARS-CoV-2:
* PCR+ y PCR+ en las últimas 24 horas
* Fallecimientos
* IA 14 días
* % Hospitalizados
* % UCI

A diferencia de los datos del ministerio, que únicamente da las PCR+ que se ejecutaron y obtuvieron resultado el día
anterior, este script comprueba la diferencia de datos entre el reporte del día actual y el anterior, para notificar el
Expand All @@ -17,16 +21,18 @@ Links:

## Ejecución

El repositorio cuenta con dos scripts:
El repositorio cuenta con tres scripts:

* `main_daily`: debe lanzarse de lunes a viernes en intervalos de 5 minutos. Comprueba si se ha publicado el nuevo
informe y en caso positivo actualiza la BBDD y publica los tweets.
informe de casos/fallecimientos/hospitalizaciones y en caso positivo actualiza la BBDD y publica los tweets.
* `main_vaccination`: debe lanzarse de lunes a viernes en intervalos de 5 minutos. Comprueba si se ha publicado el nuevo
informe de vacunación y en caso positivo actualiza la BBDD y publica los tweets.
* `main_weekly`: debe lanzarse una única vez los domingos para publicar las estadísticas semanales.

Para planificar la ejecución de ambos scripts puedes hacer uso de `cron`. En concreto, estas son las expresiones que
se están usando para cada uno de los scripts:

* `main_daily`: `*/5 16-21 * * 1-5` (cada 5 minutos de 16 a 21h de lunes a viernes)
* `main_daily` y `main_vaccination`: `*/5 16-21 * * 1-5` (cada 5 minutos de 16 a 21.55h de lunes a viernes)
* `main_weekly`: `0 18 * * 0` (los domingos a las 18.00h)

Ten en cuenta que deben definirse ciertas variables de entorno para lanzar los scripts:
Expand Down
2 changes: 1 addition & 1 deletion constants.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from datetime import date

GRAPH_IMAGE_PATH = "render/d-solo/HukfaHZgk/covid19?orgId=1&panelId=2&width=1000&height=500&tz=Europe%2FMadrid"
DAYS_WITHOUT_REPORT = [date(2020, 12, 8), date(2020, 12, 25), date(2021, 1, 1)]
DAYS_WITHOUT_REPORT = [date(2020, 12, 8), date(2020, 12, 25), date(2021, 1, 1), date(2021, 1, 6)]
13 changes: 10 additions & 3 deletions helpers/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ class Measurement(Enum):
ACCUMULATED_INCIDENCE = "accumulated_incidence"
PERCENTAGE_ADMITTED = "percentage_admitted"
PERCENTAGE_ICU = "percentage_icu"
VACCINATIONS = "vaccinations"


class Influx:
Expand Down Expand Up @@ -84,6 +85,7 @@ def get_all_stats_group_by_week(self, day):
accumulated_incidence = self.get_stat_group_by_day(Measurement.ACCUMULATED_INCIDENCE, week_friday)
percentage_admitted = self.get_stat_group_by_day(Measurement.PERCENTAGE_ADMITTED, week_friday)
percentage_icu = self.get_stat_group_by_day(Measurement.PERCENTAGE_ICU, week_friday)
vaccinations = self.get_stat_group_by_week(Measurement.VACCINATIONS, day)

return self._pack_elements(**{
Measurement.PCRS.value: pcrs,
Expand All @@ -93,7 +95,8 @@ def get_all_stats_group_by_week(self, day):
Measurement.ICU_PEOPLE.value: icu,
Measurement.ACCUMULATED_INCIDENCE.value: accumulated_incidence,
Measurement.PERCENTAGE_ICU.value: percentage_icu,
Measurement.PERCENTAGE_ADMITTED.value: percentage_admitted
Measurement.PERCENTAGE_ADMITTED.value: percentage_admitted,
Measurement.VACCINATIONS.value: vaccinations
})

def get_all_stats_group_by_day(self, day):
Expand All @@ -105,6 +108,7 @@ def get_all_stats_group_by_day(self, day):
accumulated_incidence = self.get_stat_group_by_day(Measurement.ACCUMULATED_INCIDENCE, day)
percentage_admitted = self.get_stat_group_by_day(Measurement.PERCENTAGE_ADMITTED, day)
percentage_icu = self.get_stat_group_by_day(Measurement.PERCENTAGE_ICU, day)
vaccinations = self.get_stat_group_by_day(Measurement.VACCINATIONS, day)

return self._pack_elements(**{
Measurement.PCRS.value: pcrs,
Expand All @@ -114,16 +118,19 @@ def get_all_stats_group_by_day(self, day):
Measurement.ICU_PEOPLE.value: icu,
Measurement.ACCUMULATED_INCIDENCE.value: accumulated_incidence,
Measurement.PERCENTAGE_ADMITTED.value: percentage_admitted,
Measurement.PERCENTAGE_ICU.value: percentage_icu
Measurement.PERCENTAGE_ICU.value: percentage_icu,
Measurement.VACCINATIONS.value: vaccinations
})

def get_all_stats_accumulated_until_day(self, day):
pcrs = self.get_stat_accumulated_until_day(Measurement.PCRS, day)
deaths = self.get_stat_accumulated_until_day(Measurement.DEATHS, day)
vaccinations = self.get_stat_accumulated_until_day(Measurement.VACCINATIONS, day)

return self._pack_elements(**{
Measurement.PCRS.value: pcrs,
Measurement.DEATHS.value: deaths
Measurement.DEATHS.value: deaths,
Measurement.VACCINATIONS.value: vaccinations
})

@staticmethod
Expand Down
56 changes: 39 additions & 17 deletions helpers/ministry_report.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
from enum import Enum
from datetime import datetime
from constants import DAYS_WITHOUT_REPORT
import math
import tabula
from abc import ABC, abstractmethod


class SpainCovid19MinistryReport:

PDF_URL_FORMAT = "https://www.mscbs.gob.es/en/profesionales/saludPublica/ccayes/alertasActual/nCov-China/" \
"documentos/Actualizacion_{0}_COVID-19.pdf"
class GenericMinistryReport(ABC):

def __init__(self, date, page, area=None):
self._date = date
Expand All @@ -19,17 +18,44 @@ def __init__(self, date, page, area=None):
def data_frame(self):
if self._data_frame is None:
col2str = {'dtype': str}
data_frames = tabula.read_pdf(self.PDF_URL_FORMAT.format(self.get_pdf_id_for_date(self._date)),
pages=str(self._page), area=self._area, pandas_options=col2str)
data_frames = tabula.read_pdf(self._get_url(), pages=str(self._page), area=self._area,
pandas_options=col2str)
self._data_frame = list(filter(lambda x: len(x) >= 19, data_frames))[0]

for column in self._data_frame:
self._data_frame[column.replace('*', '').strip()] = self._data_frame.pop(column)

return self._data_frame

@abstractmethod
def _get_url(self):
pass

def get_column_data(self, column, part=0, cast=int):
first_column = self.data_frame.columns[0]
ccaas_column = self.data_frame[first_column].astype(str)
first_ccaa_position = ccaas_column.loc[ccaas_column.str.startswith('Andalucía', na=False)].index[0]

cases = {}
for i in range(first_ccaa_position, first_ccaa_position + 19):
ccaa = self.data_frame[first_column][i].replace('*', '').replace('(', '').replace(')', '').replace('Leon', 'León').strip()
value = self.data_frame[self.data_frame.columns[column]][i].split(' ')[part].replace('.', '').replace('-', '0').replace(',', '.').replace('%', '')

cases[ccaa] = cast(value)

return cases


class SpainCovid19MinistryReport(GenericMinistryReport):

PDF_URL_FORMAT = "https://www.mscbs.gob.es/en/profesionales/saludPublica/ccayes/alertasActual/nCov-China/" \
"documentos/Actualizacion_{0}_COVID-19.pdf"

def _get_url(self):
return self.PDF_URL_FORMAT.format(self.get_cases_pdf_id_for_date(self._date))

@staticmethod
def get_pdf_id_for_date(date):
def get_cases_pdf_id_for_date(date):
# 14/5/2020 -> id: 105
# Starting on 4/7/2020, Spanish Public Health Ministry does not publish reports at weekends.
reference_date = datetime(2020, 5, 14)
Expand All @@ -44,16 +70,12 @@ def get_pdf_id_for_date(date):

return pdf_id

def get_column_data(self, column, part=0, cast=int):
first_column = self.data_frame.columns[0]
ccaas_column = self.data_frame[first_column].astype(str)
first_ccaa_position = ccaas_column.loc[ccaas_column.str.startswith('Andalucía', na=False)].index[0]

cases = {}
for i in range(first_ccaa_position, first_ccaa_position + 19):
ccaa = self.data_frame[first_column][i].replace('*', '')
value = self.data_frame[self.data_frame.columns[column]][i].split(' ')[part].replace('.', '').replace('-', '0').replace(',', '.').replace('%', '')
class VaccinesMinistryReport(GenericMinistryReport):

cases[ccaa] = cast(value)
VACCINES_URL_FORMAT = "https://www.mscbs.gob.es/profesionales/saludPublica/ccayes/alertasActual/nCov/documentos/" \
"Informe_GIV_comunicacion_{0}.pdf"

return cases
def _get_url(self):
date_str = self._date.strftime("%Y%m%d")
return self.VACCINES_URL_FORMAT.format(date_str)
38 changes: 29 additions & 9 deletions helpers/reports.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,28 @@
from helpers.spain_geography import CCAA_POPULATION, CCAA_ADMITTED_BEDS, CCAA_ICU_BEDS


def get_vaccination_report(accumulated_data, today_data):
sentences = []
for ccaa in accumulated_data:
sentences.append(get_vaccination_sentence(ccaa, accumulated_data[ccaa], today_data[ccaa]))

sentences.append("")
sentences.append(get_vaccination_sentence("🇪🇸 España", sum(accumulated_data.values()),
sum(today_data.values())))
sentences.append("")
sentences.append("* Porcentajes sobre población total de CCAA")
return sentences


def get_vaccination_sentence(territorial_unit, accumulated, today_total):
population = CCAA_POPULATION[territorial_unit] if territorial_unit in CCAA_POPULATION \
else sum(CCAA_POPULATION.values())
percentage_population = accumulated / population * 100
return "- {0}: {1} ({2}%) 🔺{3}".format(territorial_unit, _format_number(accumulated),
_format_number(percentage_population),
_format_number(today_total))


def get_report_by_ccaa(date_in_header, ccaas_today, ccaas_yesterday, ccaas_accumulated_today):
tweets = []
for ccaa in sorted(ccaas_today.keys()):
Expand Down Expand Up @@ -66,21 +88,19 @@ def get_territorial_unit_report(territorial_unit, header_date, today_data, yeste
sentences = list()
sentences.append(f"{territorial_unit} - {header_date}:")
sentences.append("")
sentences.append(get_report_sentence("💉 PCRs/AGs", today_data.get(Measurement.PCRS),
yesterday_data.get(Measurement.PCRS),
sentences.append(get_report_sentence("🧪 PCRs", today_data.get(Measurement.PCRS), None,
accumulated_today.get(Measurement.PCRS)))

if Measurement.PCRS_LAST_24H in today_data:
sentences.append(get_report_sentence("💉 PCRs/AGs 24h", today_data.get(Measurement.PCRS_LAST_24H),
sentences.append(get_report_sentence("🧪 PCRs 24h", today_data.get(Measurement.PCRS_LAST_24H),
yesterday_data.get(Measurement.PCRS_LAST_24H)))

sentences.append(get_report_sentence_with_unit("💥 IA 14 días",
today_data.get(Measurement.ACCUMULATED_INCIDENCE),
yesterday_data.get(Measurement.ACCUMULATED_INCIDENCE),
"/100.000 hab."))
sentences.append(get_report_sentence_with_unit("💥 IA",
today_data.get(Measurement.ACCUMULATED_INCIDENCE),
yesterday_data.get(Measurement.ACCUMULATED_INCIDENCE),
"/100.000 hab."))
sentences.append("")
sentences.append(get_report_sentence("😢 Muertes", today_data.get(Measurement.DEATHS),
yesterday_data.get(Measurement.DEATHS),
sentences.append(get_report_sentence("😢 Muertes", today_data.get(Measurement.DEATHS), None,
accumulated_today.get(Measurement.DEATHS)))
sentences.append("")
sentences.append(get_report_sentence_with_unit("🚑 Hospitalizados", today_data.get(Measurement.PERCENTAGE_ADMITTED),
Expand Down
76 changes: 38 additions & 38 deletions helpers/spain_geography.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,47 +21,47 @@
}

CCAA_ADMITTED_BEDS = {
'Andalucía': 15479,
'Aragón': 4146,
'Asturias': 3251,
'Baleares': 3033,
'Canarias': 4900,
'Cantabria': 1554,
'Castilla La Mancha': 4783,
'Castilla y León': 6701,
'Cataluña': 25221,
'Ceuta': 185,
'C. Valenciana': 11413,
'Extremadura': 3143,
'Galicia': 8255,
'Madrid': 16006,
'Melilla': 178,
'Murcia': 3703,
'Navarra': 2147,
'País Vasco': 4928,
'La Rioja': 859,
"Andalucía": 17112,
"Aragón": 4206,
"Asturias": 3492,
"Baleares": 3353,
"Canarias": 5349,
"Cantabria": 1609,
"Castilla La Mancha": 4817,
"Castilla y León": 6703,
"Cataluña": 24629,
"Ceuta": 200,
"C. Valenciana": 11644,
"Extremadura": 3188,
"Galicia": 8298,
"Madrid": 15818,
"Melilla": 179,
"Murcia": 3952,
"Navarra": 1881,
"País Vasco": 4708,
"La Rioja": 809,
}

CCAA_ICU_BEDS = {
'Andalucía': 1437,
'Aragón': 208,
'Asturias': 260,
'Baleares': 283,
'Canarias': 417,
'Cantabria': 115,
'Castilla La Mancha': 387,
'Castilla y León': 464,
'Cataluña': 1274,
'Ceuta': 16,
'C. Valenciana': 1033,
'Extremadura': 222,
'Galicia': 715,
'Madrid': 1265,
'Melilla': 13,
'Murcia': 525,
'Navarra': 133,
'País Vasco': 314,
'La Rioja': 59,
"Andalucía": 1688,
"Aragón": 233,
"Asturias": 322,
"Baleares": 305,
"Canarias": 442,
"Cantabria": 122,
"Castilla La Mancha": 377,
"Castilla y León": 513,
"Cataluña": 1358,
"Ceuta": 17,
"C. Valenciana": 1073,
"Extremadura": 215,
"Galicia": 729,
"Madrid": 1132,
"Melilla": 13,
"Murcia": 465,
"Navarra": 125,
"País Vasco": 412,
"La Rioja": 59,
}


Expand Down
31 changes: 31 additions & 0 deletions helpers/twitter.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import os
import re
from tempfile import NamedTemporaryFile
import tweepy
import requests
Expand Down Expand Up @@ -55,3 +56,33 @@ def _download_file(media_url, file):
file.flush()
else:
raise MediaNotAccessibleError("File could not be downloaded")

def publish_sentences_in_tweets(self, sentences, header=None):
tweets = self._split_tweets(sentences, header)
self.publish_tweets(tweets)

def _split_tweets(self, sentences, header=None):
tweets = []

header_format = header + " ({0}/{1}):\n\n" if header else ""
# We assume that the total amount of tweets will be 9 or less...
header_length = self._get_tweet_length(header_format.format(0, 0))

current_tweet = ""
for sentence in sentences:
# Twitter counts emoji as double characters...
if self._get_tweet_length(current_tweet) + self._get_tweet_length(sentence) + header_length > 280:
tweets.append(current_tweet.strip("\n"))
current_tweet = ""

current_tweet += sentence + "\n"

tweets.append(current_tweet.strip("\n"))
tweets = list(filter(lambda x: x, tweets))

return list(map(lambda x: header_format.format(x + 1, len(tweets)) + tweets[x], range(0, len(tweets))))

@staticmethod
def _get_tweet_length(sentence):
emoji_regex = re.compile('[\U00010000-\U0010ffff]', flags=re.UNICODE)
return len(sentence) + len(emoji_regex.findall(sentence))
Loading

0 comments on commit 57d6db6

Please sign in to comment.