Skip to content

Commit

Permalink
Merge pull request #3711 from esl/graphql-gdpr-resolver
Browse files Browse the repository at this point in the history
Adding gdpr resolver
  • Loading branch information
JanuszJakubiec authored Jul 20, 2022
2 parents 0fbad9e + 70f14ca commit 8c29117
Show file tree
Hide file tree
Showing 11 changed files with 264 additions and 127 deletions.
1 change: 1 addition & 0 deletions big_tests/default.spec
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
{suites, "tests", graphql_session_SUITE}.
{suites, "tests", graphql_stanza_SUITE}.
{suites, "tests", graphql_stats_SUITE}.
{suites, "tests", graphql_gdpr_SUITE}.
{suites, "tests", graphql_vcard_SUITE}.
{suites, "tests", graphql_http_upload_SUITE}.
{suites, "tests", graphql_metric_SUITE}.
Expand Down
1 change: 1 addition & 0 deletions big_tests/dynamic_domains.spec
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@
{suites, "tests", graphql_vcard_SUITE}.
{suites, "tests", graphql_offline_SUITE}.
{suites, "tests", graphql_stats_SUITE}.
{suites, "tests", graphql_gdpr_SUITE}.
{suites, "tests", graphql_http_upload_SUITE}.
{suites, "tests", graphql_metric_SUITE}.

Expand Down
2 changes: 1 addition & 1 deletion big_tests/tests/gdpr_SUITE.erl
Original file line number Diff line number Diff line change
Expand Up @@ -1583,7 +1583,7 @@ get_personal_data_via_rpc(Client, ExpectedKeys) ->
ClientU = escalus_utils:jid_to_lower(escalus_client:username(Client)),
ClientS = escalus_utils:jid_to_lower(escalus_client:server(Client)),
AllPersonalData = mongoose_helper:successful_rpc(
service_admin_extra_gdpr, get_data_from_modules, [ClientU, ClientS]),
gdpr_api, get_data_from_modules, [ClientU, ClientS]),
%% We don't use lists:filter/2 because this line also ensures order
[ lists:keyfind(Key, 1, AllPersonalData) || Key <- ExpectedKeys ].

Expand Down
96 changes: 96 additions & 0 deletions big_tests/tests/graphql_gdpr_SUITE.erl
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
-module(graphql_gdpr_SUITE).

-compile([export_all, nowarn_export_all]).

-import(distributed_helper, [require_rpc_nodes/1]).
-import(domain_helper, [host_type/0, domain/0]).
-import(graphql_helper, [execute_user/3, execute_auth/2, user_to_bin/1]).
-import(distributed_helper, [mim/0, rpc/4]).

-include_lib("common_test/include/ct.hrl").
-include_lib("eunit/include/eunit.hrl").

suite() ->
require_rpc_nodes([mim]) ++ escalus:suite().

all() ->
[{group, admin_gdpr}].

groups() ->
[{admin_gdpr, [], admin_stats_handler()}].

admin_stats_handler() ->
[admin_gdpr_test,
admin_gdpr_no_user_test].

init_per_suite(Config) ->
escalus:init_per_suite(Config).

end_per_suite(Config) ->
escalus:end_per_suite(Config).

init_per_group(_, Config) ->
graphql_helper:init_admin_handler(Config).

end_per_group(_, _Config) ->
escalus_fresh:clean().

init_per_testcase(CaseName, Config) ->
escalus:init_per_testcase(CaseName, Config).

end_per_testcase(CaseName, Config) ->
escalus_fresh:clean(),
escalus:end_per_testcase(CaseName, Config).

% Admin test cases

admin_gdpr_test(Config) ->
escalus:fresh_story_with_config(Config, [{alice, 1}], fun admin_gdpr_test/2).

admin_gdpr_test(Config, Alice) ->
Filename = random_filename(Config),
Vars = #{<<"username">> => escalus_client:username(Alice),
<<"domain">> => escalus_client:server(Alice),
<<"resultFilepath">> => list_to_binary(Filename)},
Result = admin_retrieve_personal_data(Config, Vars),
ParsedResult = ok_result(<<"gdpr">>, <<"retrievePersonalData">>, Result),
?assertEqual(<<"Data retrieved">>, ParsedResult),
FullPath = get_mim_cwd() ++ "/" ++ Filename,
Dir = make_dir_name(Filename, escalus_client:username(Alice)),
ct:log("extracting logs ~s", [Dir]),
?assertMatch({ok, _}, zip:extract(FullPath, [{cwd, Dir}])).

admin_gdpr_no_user_test(Config) ->
Vars = #{<<"username">> => <<"AAAA">>, <<"domain">> => domain(),
<<"resultFilepath">> => <<"AAA">>},
Result = admin_retrieve_personal_data(Config, Vars),
ParsedResult = error_result(<<"extensions">>, <<"code">>, Result),
?assertEqual(<<"user_does_not_exist_error">>, ParsedResult).

% Helpers

admin_retrieve_personal_data(Config, Vars) ->
Query = <<"query Q1($username: String!, $domain: String!, $resultFilepath: String!)
{gdpr{retrievePersonalData(username: $username, domain: $domain,
resultFilepath: $resultFilepath)}}">>,
Body = #{query => Query, operationName => <<"Q1">>, variables => Vars},
execute_auth(Body, Config).

error_result(What1, What2, {{<<"200">>, <<"OK">>}, #{<<"errors">> := [Data]}}) ->
maps:get(What2, maps:get(What1, Data)).

ok_result(What1, What2, {{<<"200">>, <<"OK">>}, #{<<"data">> := Data}}) ->
maps:get(What2, maps:get(What1, Data)).

random_filename(Config) ->
TCName = atom_to_list(?config(tc_name, Config)),
TCName ++ "." ++ integer_to_list(erlang:system_time()) ++ ".zip".

get_mim_cwd() ->
{ok, Cwd} = rpc(mim(), file, get_cwd, []),
Cwd.

make_dir_name(Filename, User) when is_binary(User) ->
make_dir_name(Filename, binary_to_list(User));
make_dir_name(Filename, User) when is_list(User) ->
Filename ++ "." ++ User ++ ".unzipped".
2 changes: 2 additions & 0 deletions priv/graphql/schemas/admin/admin_schema.gql
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ type AdminQuery{
metric: MetricAdminQuery
"Statistics"
stats: StatsAdminQuery
"Personal data management according to GDPR"
gdpr: GdprAdminQuery
}

"""
Expand Down
5 changes: 5 additions & 0 deletions priv/graphql/schemas/admin/gdpr.gql
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
"Retrieve user's presonal data"
type GdprAdminQuery {
"Retrieves all personal data from MongooseIM for a given user"
retrievePersonalData(username: String!, domain: String!, resultFilepath: String!): String
}
129 changes: 3 additions & 126 deletions src/admin_extra/service_admin_extra_gdpr.erl
Original file line number Diff line number Diff line change
Expand Up @@ -4,140 +4,17 @@
-include("mongoose_logger.hrl").
-include("jlib.hrl").

-export([commands/0, retrieve_all/3]).

% Exported for RPC call
-export([retrieve_logs/2, get_data_from_modules/2]).

-ignore_xref([commands/0, retrieve_all/3, retrieve_logs/2, get_data_from_modules/2]).

-define(CMD_TIMEOUT, 300000).
-export([commands/0]).
-ignore_xref([commands/0]).

-spec commands() -> [ejabberd_commands:cmd()].
commands() -> [
#ejabberd_commands{name = retrieve_personal_data, tags = [gdpr],
desc = "Retrieve user's presonal data.",
longdesc = "Retrieves all personal data from MongooseIM for a given user. Example:\n"
" mongooseimctl retrieve_personal_data alice localhost /home/mim/alice.smith.zip ",
module = ?MODULE,
module = gdpr_api,
function = retrieve_all,
args = [{username, binary}, {domain, binary}, {path, binary}],
result = {res, rescode}}
].

-spec retrieve_all(jid:user(), jid:server(), Path :: binary()) -> ok | {error, Reason :: any()}.
retrieve_all(Username, Domain, ResultFilePath) ->
JID = jid:make(Username, Domain, <<>>),
case user_exists(JID) of
true ->
DataFromModules = get_data_from_modules(JID),
% The contract is that we create personal data files only when there are any items
% returned for the data group.
DataToWrite = lists:filter(fun({_, _, Items}) -> Items /= [] end, DataFromModules),

TmpDir = make_tmp_dir(),

CsvFiles = lists:map(
fun({DataGroup, Schema, Entries}) ->
BinDataGroup = atom_to_binary(DataGroup, utf8),
FileName = <<BinDataGroup/binary, ".csv">>,
to_csv_file(FileName, Schema, Entries, TmpDir),
binary_to_list(FileName)
end,
DataToWrite),

LogFiles = get_all_logs(Username, Domain, TmpDir),

ZipFile = binary_to_list(ResultFilePath),
{ok, ZipFile} = zip:create(ZipFile, CsvFiles ++ LogFiles, [{cwd, TmpDir}]),
remove_tmp_dir(TmpDir),
ok;
false ->
{error, "User does not exist"}
end.

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% Private funs
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

-spec get_data_from_modules(jid:user(), jid:server()) -> gdpr:personal_data().
get_data_from_modules(Username, Domain) ->
JID = jid:make(Username, Domain, <<>>),
get_data_from_modules(JID).

-spec get_data_from_modules(jid:jid()) -> gdpr:personal_data().
get_data_from_modules(JID) ->
{ok, HostType} = mongoose_domain_api:get_domain_host_type(JID#jid.lserver),
mongoose_hooks:get_personal_data(HostType, JID).

-spec to_csv_file(file:name_all(), gdpr:schema(), gdpr:entries(), file:name()) -> ok.
to_csv_file(Filename, DataSchema, DataRows, TmpDir) ->
FilePath = <<(list_to_binary(TmpDir))/binary, "/", Filename/binary>>,
{ok, File} = file:open(FilePath, [write]),
Encoded = erl_csv:encode([DataSchema | DataRows]),
file:write(File, Encoded),
file:close(File).

-spec user_exists(jid:jid()) -> boolean().
user_exists(JID) ->
ejabberd_auth:does_user_exist(JID).

-spec make_tmp_dir() -> file:name().
make_tmp_dir() ->
TmpDirName = lists:flatten(io_lib:format("/tmp/gdpr-~4.36.0b", [rand:uniform(36#zzzz)])),
case file:make_dir(TmpDirName) of
ok -> TmpDirName;
{error, eexist} -> make_tmp_dir();
{error, Error} -> {error, Error}
end.

-spec remove_tmp_dir(file:name()) -> ok.
remove_tmp_dir(TmpDir) ->
{ok, FileNames} = file:list_dir(TmpDir),
[file:delete(TmpDir ++ "/" ++ File) || File <- FileNames],
file:del_dir(TmpDir).

-type cmd() :: string() | binary().
-spec run(cmd(), [cmd()], timeout()) -> non_neg_integer() | timeout.
run(Cmd, Args, Timeout) ->
Port = erlang:open_port({spawn_executable, Cmd}, [exit_status, {args, Args}]),
receive
{Port, {exit_status, ExitStatus}} -> ExitStatus
after Timeout ->
timeout
end.

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% Logs retrieval
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

-spec retrieve_logs(gdpr:username(), gdpr:domain()) -> {ok, ZippedLogs :: binary()}.
retrieve_logs(Username, Domain) ->
TmpDir = make_tmp_dir(),
LogFile = get_logs(Username, Domain, TmpDir),
{ok, {_, ZippedLogs}} = zip:create("archive.zip", [LogFile], [memory, {cwd, TmpDir}]),
remove_tmp_dir(TmpDir),
{ok, ZippedLogs}.

-spec get_all_logs(gdpr:username(), gdpr:domain(), file:name()) -> [file:name()].
get_all_logs(Username, Domain, TmpDir) ->
OtherNodes = mongoose_cluster:other_cluster_nodes(),
LogFile = get_logs(Username, Domain, TmpDir),
LogFilesFromOtherNodes = [get_logs_from_node(Node, Username, Domain, TmpDir) || Node <- OtherNodes],
[LogFile | LogFilesFromOtherNodes].

-spec get_logs(gdpr:username(), gdpr:domain(), file:name()) -> file:name().
get_logs(Username, Domain, TmpDir) ->
FileList = [filename:absname(F) || F <- mongoose_logs:get_log_files()],
Cmd = code:priv_dir(mongooseim) ++ "/parse_logs.sh",
FileName = "logs-" ++ atom_to_list(node()) ++ ".txt",
FilePath = TmpDir ++ "/" ++ FileName,
Args = [FilePath, Username, Domain | FileList],
0 = run(Cmd, Args, ?CMD_TIMEOUT),
FileName.

-spec get_logs_from_node(node(), gdpr:username(), gdpr:domain(), file:name()) -> file:name().
get_logs_from_node(Node, Username, Domain, TmpDir) ->
{ok, ZippedData} = rpc:call(Node, ?MODULE, retrieve_logs, [Username, Domain]),
{ok, [File]} = zip:unzip(ZippedData, [{cwd, TmpDir}]),
filename:basename(File).
Loading

0 comments on commit 8c29117

Please sign in to comment.