Skip to content

Commit

Permalink
Improve session termination reason handling (#271)
Browse files Browse the repository at this point in the history
* Improve session termination reason handling

* rework the termination cause handling

* Update METRICS.md

* Update ergw_aaa to 3.6.4

Co-authored-by: Andreas Schultz <[email protected]>
  • Loading branch information
vkatsuba and RoadRunnr committed Dec 10, 2020
1 parent 089cdc9 commit 5b145ab
Show file tree
Hide file tree
Showing 12 changed files with 94 additions and 50 deletions.
26 changes: 18 additions & 8 deletions METRICS.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ The following metrics exist:
| gtp\_u\_socket\_messages\_processed\_total | counter | name, direction, version, type | Total number of GTP message processed on socket |
| ergw\_local\_pool\_free | gauge | name, type, id | Number of free IPs |
| ergw\_local\_pool\_used | gauge | name, type, id | Number of used IPs |
| termination\_cause\_total | counter | name, type | Total number of termination causes |
| termination\_cause\_total | counter | api, reason | Total number of termination causes |

The label `name` is is taken from the configuration of the GTP socket and PeerIP is the IP address of
the peer GSN.
Expand Down Expand Up @@ -192,13 +192,23 @@ For GTPv2-C messages the following types exist:
* version\_not\_supported

The label `type` is the Termination Causes types. For Termination causes the following types exist:
* normal
* administrative
* link_broken
* upf_failure
* remote_failure
* inactivity_timeout
* peer_restart
| Reason | Description |
| --------------------- | ---------------- |
| normal | Normal session termination |
| administrative | The session is terminated by an administrative action (e.g. draining) |
| link_broken | A session message initiated by the ERGW to the SGW/SGSN is rejected |
| upf_failure | Communication between the ERGW and the UPF failed |
| remote_failure | In a proxy setup, communication to the remote PGW failed |
| peer_restart | The session is terminated because remote peer restart was detected one of the GTP peer connections the session is associated to |
| cp_inactivity_timeout | The session had no Control Plane (GTP-C) activity within the configured session inactivity timeout |
| up_inactivity_timeout | User plane has reported that the session had no user data transfer withing the configured session inactivity timeout |
| 'ASR' | One of the `AAA` interfaces (Gx, Gy, Ro) has disconnected the session |
| error | An unidentified error has been returned for an AAA request |
| req_timeout | An `AAA` request related to the session has timed out. :warning: **Note** : that this is only triggered when the `AAA` handler has no configured default answer for this error |
| conn_error | An `AAA` request related to the session failed because of no connection available. :warning: **Note** : that this is only triggered when the `AAA` handler has no configured default answer for this error |
| rate_limit | An `AAA` request related to the session failed because of rate limit reached towards the `AAA` interface instance. :warning: **Note** : that this is only triggered when the `AAA` handler has no configured default answer for this error |
| ocs_hold_end | The session was terminated because the OCS Hold duration given by the `AAA` Gy interface has expired |
| peer_reject | An `AAA` peer (e.g. OCS, PCRF) has sent an error result in the response to a request |

The HTTP API exports the metrics in Prometheus format at `/metrics`:

Expand Down
10 changes: 8 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,10 @@ Example of configuration **RADIUS**:
{link_broken, 2},
{upf_failure, 9},
{remote_failure, 9},
{inactivity_timeout, 4},
{cp_inactivity_timeout, 4},
{up_inactivity_timeout, 4},
{'ASR', 6},
{error, 9},
{peer_restart, 7}
]}
]}
Expand Down Expand Up @@ -167,7 +170,10 @@ Example of configuration **ergw-pgw-epc-rf** `function` of **DIAMETER**:
{link_broken, 5},
{upf_failure, 5},
{remote_failure, 1},
{inactivity_timeout, 1},
{cp_inactivity_timeout, 4},
{up_inactivity_timeout, 4},
{'ASR', 6},
{error, 9},
{peer_restart, 1}
]}
%% ...
Expand Down
2 changes: 1 addition & 1 deletion rebar.config
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
{netdata, {git, "https://github.com/RoadRunnr/erl_netdata.git", {ref, "cbd6eaf"}}},
{gtplib, {git, "https://github.com/travelping/gtplib.git", {branch, "master"}}},
{pfcplib, {git, "https://github.com/travelping/pfcplib.git", {branch, "master"}}},
{ergw_aaa, {git, "git://github.com/travelping/ergw_aaa", {tag, "3.6.2"}}},
{ergw_aaa, {git, "https://github.com/travelping/ergw_aaa.git", {tag, "3.6.4"}}},
{prometheus_diameter_collector, {git, "https://github.com/travelping/prometheus_diameter_collector.git", {tag, "1.0.1"}}}
]}.

Expand Down
4 changes: 2 additions & 2 deletions rebar.lock
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
{ref,"7147d879177f3a9ad88f909a12e41e1c565269b0"}},
1},
{<<"ergw_aaa">>,
{git,"git://github.com/travelping/ergw_aaa",
{ref,"4a55ea63a783ec25dc9a6fed3d9a32f367c6697c"}},
{git,"https://github.com/travelping/ergw_aaa.git",
{ref,"64f64872aadb185e54e637e6b9ba2bbdc0d2f6df"}},
0},
{<<"erlando">>,
{git,"https://github.com/travelping/erlando.git",
Expand Down
14 changes: 9 additions & 5 deletions src/ergw_gtp_gsn_lib.erl
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
{parse_transform, cut}]).

-export([connect_upf_candidates/4, create_session/10]).
-export([triggered_charging_event/4, usage_report/3, close_context/2]).
-export([triggered_charging_event/4, usage_report/3, close_context/3]).
-export([update_tunnel_endpoint/3, handle_peer_change/3, update_tunnel_endpoint/2,
apply_bearer_change/5]).

Expand Down Expand Up @@ -224,10 +224,14 @@ triggered_charging_event(ChargeEv, Now, Request,
usage_report(URRActions, UsageReport, #{pfcp := PCtx, 'Session' := Session}) ->
ergw_gtp_gsn_session:usage_report(URRActions, UsageReport, PCtx, Session).

close_context(Reason, #{pfcp := PCtx, 'Session' := Session}) ->
UsageReport = ergw_pfcp_context:delete_session(Reason, PCtx),
ergw_gtp_gsn_session:close_context(Reason, UsageReport, PCtx, Session),
ergw_prometheus:termination_cause(?FUNCTION_NAME, Reason),
%% close_context/3
close_context(_, {API, TermCause}, Context) ->
close_context(API, TermCause, Context);
close_context(API, TermCause, #{pfcp := PCtx, 'Session' := Session})
when is_atom(TermCause) ->
UsageReport = ergw_pfcp_context:delete_session(TermCause, PCtx),
ergw_gtp_gsn_session:close_context(TermCause, UsageReport, PCtx, Session),
ergw_prometheus:termination_cause(API, TermCause),
ok.

%%====================================================================
Expand Down
4 changes: 2 additions & 2 deletions src/ergw_prometheus.erl
Original file line number Diff line number Diff line change
Expand Up @@ -110,8 +110,8 @@ declare() ->

%% Termination cause metrics
prometheus_counter:declare([{name, termination_cause_total},
{labels, [name, type]},
{help, "Total number of termination causes"}]),
{labels, [api, reason]},
{help, "Total number of termination causes"}]),
ok.

%%%===================================================================
Expand Down
9 changes: 5 additions & 4 deletions src/ggsn_gn.erl
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@

-import(ergw_aaa_session, [to_session/1]).

-define(API, 'gn/gp').
-define(T3, 10 * 1000).
-define(N3, 5).

Expand Down Expand Up @@ -237,7 +238,7 @@ handle_request(ReqKey,
handle_request(ReqKey,
#gtp{type = delete_pdp_context_request, ie = _IEs} = Request,
_Resent, _State, #{left_tunnel := LeftTunnel} = Data) ->
ergw_gtp_gsn_lib:close_context(normal, Data),
ergw_gtp_gsn_lib:close_context(?API, normal, Data),
Response = response(delete_pdp_context_response, LeftTunnel, request_accepted),
gtp_context:send_response(ReqKey, Request, Response),
{next_state, shutdown, Data};
Expand All @@ -248,7 +249,7 @@ handle_request(ReqKey, _Msg, _Resent, _State, _Data) ->

handle_response({From, TermCause}, timeout, #gtp{type = delete_pdp_context_request},
_State, Data) ->
ergw_gtp_gsn_lib:close_context(TermCause, Data),
ergw_gtp_gsn_lib:close_context(?API, TermCause, Data),
if is_tuple(From) -> gen_statem:reply(From, {error, timeout});
true -> ok
end,
Expand All @@ -262,7 +263,7 @@ handle_response({From, TermCause},
LeftTunnel = gtp_path:bind(Response, LeftTunnel0),
DataNew = Data#{left_tunnel := LeftTunnel},

ergw_gtp_gsn_lib:close_context(TermCause, Data),
ergw_gtp_gsn_lib:close_context(?API, TermCause, Data),
if is_tuple(From) -> gen_statem:reply(From, {ok, Cause});
true -> ok
end,
Expand Down Expand Up @@ -343,7 +344,7 @@ encode_eua(Org, Number, IPv4, IPv6) ->
pdp_address = <<IPv4/binary, IPv6/binary >>}.

close_context(_Side, Reason, _State, Data) ->
ergw_gtp_gsn_lib:close_context(Reason, Data).
ergw_gtp_gsn_lib:close_context(?API, Reason, Data).

map_attr('APN', #{?'Access Point Name' := #access_point_name{apn = APN}}) ->
unicode:characters_to_binary(lists:join($., APN));
Expand Down
33 changes: 21 additions & 12 deletions src/gtp_context.erl
Original file line number Diff line number Diff line change
Expand Up @@ -115,11 +115,13 @@ tunnel_reg_update(TunnelOld, TunnelNew) ->
Insert = ordsets:subtract(NewKeys, OldKeys),
gtp_context_reg:update(Delete, Insert, ?MODULE, self()).

%% Used in tests only
delete_context(Context) ->
gen_statem:call(Context, delete_context).
gen_statem:call(Context, {delete_context, normal}).

%% Trigger from admin API
trigger_delete_context(Context) ->
gen_statem:cast(Context, delete_context).
gen_statem:cast(Context, {delete_context, administrative}).

%% TODO: add online charing events
collect_charging_events(OldS, NewS) ->
Expand Down Expand Up @@ -395,7 +397,7 @@ handle_event({call, From},
{sx, #pfcp{type = session_report_request,
ie = #{report_type := #report_type{upir = 1}}}},
State, #{pfcp := PCtx} = Data) ->
close_context(both, inactivity_timeout, State, Data),
close_context(both, up_inactivity_timeout, State, Data),
{next_state, shutdown, Data, [{reply, From, {ok, PCtx}}]};

%% Usage Report
Expand Down Expand Up @@ -443,9 +445,9 @@ handle_event(cast, {handle_response, ReqInfo, Request, Response0}, State,
erlang:raise(Class, Reason, Stacktrace)
end;

handle_event(info, #aaa_request{procedure = {_, 'ASR'}} = Request, State, Data) ->
handle_event(info, #aaa_request{procedure = {_, 'ASR'} = Procedure} = Request, State, Data) ->
ergw_aaa_session:response(Request, ok, #{}, #{}),
delete_context(undefined, administrative, State, Data);
delete_context(undefined, Procedure, State, Data);

handle_event(info, #aaa_request{procedure = {gx, 'RAR'},
events = Events} = Request,
Expand Down Expand Up @@ -553,8 +555,12 @@ handle_event(internal, {session, {update_credits, _} = CreditEv, _}, _State,
end,
{keep_state, Data#{pfcp := PCtx, pcc := PCC}};

%% Enable AAA to provide reason for session stop
handle_event(internal, {session, {stop, Reason}, _Session}, State, Data) ->
delete_context(undefined, Reason, State, Data);

handle_event(internal, {session, stop, _Session}, State, Data) ->
delete_context(undefined, normal, State, Data);
delete_context(undefined, error, State, Data);

handle_event(internal, {session, Ev, _}, _State, _Data) ->
?LOG(error, "unhandled session event: ~p", [Ev]),
Expand All @@ -569,6 +575,9 @@ handle_event(info, {timeout, TRef, pfcp_timer} = Info, _State, #{pfcp := PCtx0}
ergw_gtp_gsn_lib:triggered_charging_event(validity_time, Now, ChargingKeys, Data),
{keep_state, Data};

handle_event({call, From}, {delete_context, Reason}, State, Data)
when State == connected; State == connecting ->
delete_context(From, Reason, State, Data);
handle_event({call, From}, delete_context, State, Data)
when State == connected; State == connecting ->
delete_context(From, administrative, State, Data);
Expand Down Expand Up @@ -598,8 +607,8 @@ handle_event(cast, {usage_report, URRActions, UsageReport}, _State, Data) ->
ergw_gtp_gsn_lib:usage_report(URRActions, UsageReport, Data),
keep_state_and_data;

handle_event(cast, delete_context, State, Data) ->
delete_context(undefined, administrative, State, Data);
handle_event(cast, {delete_context, Reason}, State, Data) ->
delete_context(undefined, Reason, State, Data);

handle_event(info, {'DOWN', _MonitorRef, Type, Pid, _Info}, State,
#{pfcp := #pfcp_ctx{node = Pid}} = Data)
Expand Down Expand Up @@ -861,11 +870,11 @@ fteid_tunnel_side_f(#f_teid{ipv4 = IPv4, ipv6 = IPv6, teid = TEID},
fteid_tunnel_side_f(FqTEID, {_, _, Iter}) ->
fteid_tunnel_side_f(FqTEID, maps:next(Iter)).

close_context(Side, TermCause, State, #{interface := Interface} = Data) ->
Interface:close_context(Side, TermCause, State, Data).
close_context(Side, Reason, State, #{interface := Interface} = Data) ->
Interface:close_context(Side, Reason, State, Data).

delete_context(From, TermCause, State, #{interface := Interface} = Data) ->
Interface:delete_context(From, TermCause, State, Data).
delete_context(From, Reason, State, #{interface := Interface} = Data) ->
Interface:delete_context(From, Reason, State, Data).

%%====================================================================
%% asynchrounus usage reporting
Expand Down
9 changes: 5 additions & 4 deletions src/pgw_s5s8.erl
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@

-import(ergw_aaa_session, [to_session/1]).

-define(API, 's5/s8').
-define(GTP_v1_Interface, ggsn_gn).
-define(T3, 10 * 1000).
-define(N3, 5).
Expand Down Expand Up @@ -383,7 +384,7 @@ handle_request(ReqKey,
case match_tunnel(?'S5/S8-C SGW', LeftTunnel, FqTEID) of
ok ->
process_secondary_rat_usage_data_reports(IEs, Context, Session),
ergw_gtp_gsn_lib:close_context(normal, Data),
ergw_gtp_gsn_lib:close_context(?API, normal, Data),
Response = response(delete_session_response, LeftTunnel, request_accepted),
gtp_context:send_response(ReqKey, Request, Response),
{next_state, shutdown, Data};
Expand Down Expand Up @@ -432,7 +433,7 @@ handle_response(_, timeout, #gtp{type = update_bearer_request}, connected = Stat

handle_response({From, TermCause}, timeout, #gtp{type = delete_bearer_request},
_State, Data) ->
ergw_gtp_gsn_lib:close_context(TermCause, Data),
ergw_gtp_gsn_lib:close_context(?API, TermCause, Data),
if is_tuple(From) -> gen_statem:reply(From, {error, timeout});
true -> ok
end,
Expand All @@ -449,7 +450,7 @@ handle_response({From, TermCause},
DataNew = Data#{left_tunnel => LeftTunnel},

process_secondary_rat_usage_data_reports(IEs, Context, Session),
ergw_gtp_gsn_lib:close_context(TermCause, DataNew),
ergw_gtp_gsn_lib:close_context(?API, TermCause, DataNew),
if is_tuple(From) -> gen_statem:reply(From, {ok, RespCause});
true -> ok
end,
Expand Down Expand Up @@ -527,7 +528,7 @@ encode_paa(Type, IPv4, IPv6) ->
#v2_pdn_address_allocation{type = Type, address = <<IPv6/binary, IPv4/binary>>}.

close_context(_Side, Reason, _State, Data) ->
ergw_gtp_gsn_lib:close_context(Reason, Data).
ergw_gtp_gsn_lib:close_context(?API, Reason, Data).

map_attr('APN', #{?'Access Point Name' := #v2_access_point_name{apn = APN}}) ->
unicode:characters_to_binary(lists:join($., APN));
Expand Down
9 changes: 5 additions & 4 deletions src/saegw_s11.erl
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@

-import(ergw_aaa_session, [to_session/1]).

-define(API, 's11').
-define(GTP_v1_Interface, ggsn_gn).
-define(T3, 10 * 1000).
-define(N3, 5).
Expand Down Expand Up @@ -308,7 +309,7 @@ handle_request(ReqKey,

case match_tunnel(?'S11-C MME', LeftTunnel, FqTEID) of
ok ->
ergw_gtp_gsn_lib:close_context(normal, Data),
ergw_gtp_gsn_lib:close_context(?API, normal, Data),
Response = response(delete_session_response, LeftTunnel, request_accepted),
gtp_context:send_response(ReqKey, Request, Response),
{next_state, shutdown, Data};
Expand Down Expand Up @@ -352,7 +353,7 @@ handle_response(_, timeout, #gtp{type = update_bearer_request}, connected = Stat

handle_response({From, TermCause}, timeout, #gtp{type = delete_bearer_request},
_State, Data) ->
ergw_gtp_gsn_lib:close_context(TermCause, Data),
ergw_gtp_gsn_lib:close_context(?API, TermCause, Data),
if is_tuple(From) -> gen_statem:reply(From, {error, timeout});
true -> ok
end,
Expand All @@ -366,7 +367,7 @@ handle_response({From, TermCause},

DataNew = Data#{left_tunnel => LeftTunnel},

ergw_gtp_gsn_lib:close_context(TermCause, Data),
ergw_gtp_gsn_lib:close_context(?API, TermCause, Data),
if is_tuple(From) -> gen_statem:reply(From, {ok, Cause});
true -> ok
end,
Expand Down Expand Up @@ -444,7 +445,7 @@ encode_paa(Type, IPv4, IPv6) ->
#v2_pdn_address_allocation{type = Type, address = <<IPv6/binary, IPv4/binary>>}.

close_context(_Side, Reason, _State, Data) ->
ergw_gtp_gsn_lib:close_context(Reason, Data).
ergw_gtp_gsn_lib:close_context(?API, Reason, Data).

copy_ppp_to_session({pap, 'PAP-Authentication-Request', _Id, Username, Password}, Session0) ->
Session = Session0#{'Username' => Username, 'Password' => Password},
Expand Down
Loading

0 comments on commit 5b145ab

Please sign in to comment.