From 3d9ea6b1d05094e088257e009bf39b78bde54428 Mon Sep 17 00:00:00 2001 From: Mark Stemm Date: Mon, 18 Jul 2016 09:47:21 -0700 Subject: [PATCH] Multi string search (#624) * Whitespace diffs. Breaking whitespace diffs into standalone commit. * Implement "in (...)" as hash table lookups. Currently, a filtering expression with "x in (a, b c, ...)" is transformed into "x=a or x=b or x=c or ...". This can be slow for very long sets of a, b, c, especially when x has to be extracted from the event over and over. Replace this with a set membership test using unordered_set for PT_CHARBUF types. In filter.cpp, for CO_IN operators check the type of the operand. If it's PT_CHARBUF, loop over the items in the (a, b, c) set and call add_filter_value() for each. Otherwise, the current behavior is kept. sinsp_filter_check::add_filter_value now saves pointers to the filter values in the unordered_set m_val_storages_members, containing pairs of (pointer, length). Note that these are only pointers--the actual values are still held in m_val_storages. In sinsp_filter_check::flt_compare, for CO_IN operators simply check the unordered_set and if a match is found return true. This used to be dead code given how all CO_INs were replaced with a sequence of x=a or x=b or ..., but is used again. Custom functors g_hash_membuf/g_equal_to_membuf hash and compare the pointer-to-datas. When compiling with gcc, this simply uses gnu's built-in hash function for a pointer and length, which is quite fast. Otherwise, a standalone function is used. * Take advantage of string length. Take advantage of string length when doing set membership tests: - When comparing strings in the hash equality function, only bother doing the buffer comparison when the string lengths match. - It can be inefficient to hash a very long string when all members of the set are short strings. To make this case faster, keep track of the minumum and maximum string length across the set members and only bother doing the set comparison of the lengths overlap. To effectively use this, the length needs to be filled in when sinsp_filter_check::flt_compare() is called, so do a pass over the existing filterchecks and actually return the actual string length when it's easily known. In flt_compare(), if the type is a string and the provided length is 0, do a strlen() to find it. This should be very rare now that the length is properly passed back. --- userspace/libsinsp/filter.cpp | 184 ++++++++++++++++++---------- userspace/libsinsp/filterchecks.cpp | 87 ++++++++++++- userspace/libsinsp/filterchecks.h | 46 +++++++ 3 files changed, 252 insertions(+), 65 deletions(-) diff --git a/userspace/libsinsp/filter.cpp b/userspace/libsinsp/filter.cpp index 1642dfb58d..636521171b 100644 --- a/userspace/libsinsp/filter.cpp +++ b/userspace/libsinsp/filter.cpp @@ -397,11 +397,11 @@ bool flt_compare(cmpop op, ppm_param_type type, void* operand1, void* operand2, } } -bool flt_compare_avg(cmpop op, - ppm_param_type type, - void* operand1, - void* operand2, - uint32_t op1_len, +bool flt_compare_avg(cmpop op, + ppm_param_type type, + void* operand1, + void* operand2, + uint32_t op1_len, uint32_t op2_len, uint32_t cnt1, uint32_t cnt2) @@ -514,6 +514,8 @@ sinsp_filter_check::sinsp_filter_check() m_aggregation = A_NONE; m_merge_aggregation = A_NONE; m_val_storages = vector> (1, vector(256)); + m_val_storages_min_size = numeric_limits::max(); + m_val_storages_max_size = numeric_limits::min(); } void sinsp_filter_check::set_inspector(sinsp* inspector) @@ -996,6 +998,21 @@ void sinsp_filter_check::add_filter_value(const char* str, uint32_t len, uint16_ } parse_filter_value(str, len, filter_value_p(i), filter_value(i).size()); + + // XXX/mstemm this doesn't work if someone called + // add_filter_value more than once for a given index. + filter_value_member_t item(filter_value_p(i), len); + m_val_storages_members.insert(item); + + if(len < m_val_storages_min_size) + { + m_val_storages_min_size = len; + } + + if(len > m_val_storages_max_size) + { + m_val_storages_max_size = len; + } } @@ -1028,19 +1045,18 @@ bool sinsp_filter_check::flt_compare(cmpop op, ppm_param_type type, void* operan { if (op == CO_IN) { - if (op1_len) + // For raw strings, the length may not be set. So we do a strlen to find it. + if(type == PT_CHARBUF && op1_len == 0) { - throw sinsp_exception("filter error: cannot use 'in' operator with param type "+ to_string(type)); + op1_len = strlen((char *) operand1); } - for (uint16_t i=0; i < m_val_storages.size(); i++) + + filter_value_member_t item((uint8_t *) operand1, op1_len); + if(op1_len >= m_val_storages_min_size && + op1_len <= m_val_storages_max_size && + m_val_storages_members.find(item) != m_val_storages_members.end()) { - if (::flt_compare(CO_EQ, - type, - operand1, - filter_value_p(i))) - { - return true; - } + return true; } return false; } @@ -1599,19 +1615,8 @@ void sinsp_filter_compiler::parse_check() chk->parse_field_name((char *)&operand1[0], true); - // - // In this case we need to create '(field=value1 or field=value2 ...)' - // if(co == CO_IN) { - // - // Separate the 'or's from the - // rest of the conditions - // - m_filter->push_expression(op); - m_last_boolop = BO_NONE; - m_nest_level++; - // // Skip spaces // @@ -1630,56 +1635,109 @@ void sinsp_filter_compiler::parse_check() // m_scanpos++; - // - // The first boolean operand will be BO_NONE - // Then we will start putting BO_ORs - // - op = BO_NONE; - - // - // Create the 'or' sequence - // - while(true) + if(chk->get_field_info()->m_type == PT_CHARBUF) { - // 'in' clause aware - vector operand2 = next_operand(false, true); - // - // Append every sinsp_filter_check creating the 'or' sequence + // For character buffers, we can check all + // values at once by putting them in a set and + // checking for set membership. // - sinsp_filter_check* newchk = g_filterlist.new_filter_check_from_another(chk); - newchk->m_boolop = op; - newchk->m_cmpop = CO_EQ; - newchk->add_filter_value((char *)&operand2[0], (uint32_t)operand2.size() - 1); - m_filter->add_check(newchk); + // + // Create the 'or' sequence + // + uint64_t num_values = 0; + while(true) + { + // 'in' clause aware + vector operand2 = next_operand(false, true); - next(); + chk->add_filter_value((char *)&operand2[0], (uint32_t)operand2.size() - 1, num_values); + num_values++; + next(); - if(m_fltstr[m_scanpos] == ')') - { - break; - } - else if(m_fltstr[m_scanpos] == ',') - { - m_scanpos++; + if(m_fltstr[m_scanpos] == ')') + { + break; + } + else if(m_fltstr[m_scanpos] == ',') + { + m_scanpos++; + } + else + { + throw sinsp_exception("expected either ')' or ',' after a value inside the 'in' clause"); + } } - else + m_filter->add_check(chk); + } + else + { + // + // In this case we need to create '(field=value1 or field=value2 ...)' + // + + // + // Separate the 'or's from the + // rest of the conditions + // + m_filter->push_expression(op); + m_last_boolop = BO_NONE; + m_nest_level++; + + // + // The first boolean operand will be BO_NONE + // Then we will start putting BO_ORs + // + op = BO_NONE; + + // + // Create the 'or' sequence + // + uint64_t num_values = 0; + while(true) { - throw sinsp_exception("expected either ')' or ',' after a value inside the 'in' clause"); + // 'in' clause aware + vector operand2 = next_operand(false, true); + + // + // Append every sinsp_filter_check creating the 'or' sequence + // + sinsp_filter_check* newchk = g_filterlist.new_filter_check_from_another(chk); + newchk->m_boolop = op; + newchk->m_cmpop = CO_EQ; + newchk->add_filter_value((char *)&operand2[0], (uint32_t)operand2.size() - 1, num_values); + num_values++; + + m_filter->add_check(newchk); + + next(); + + if(m_fltstr[m_scanpos] == ')') + { + break; + } + else if(m_fltstr[m_scanpos] == ',') + { + m_scanpos++; + } + else + { + throw sinsp_exception("expected either ')' or ',' after a value inside the 'in' clause"); + } + + // + // From now on we 'or' every newchk + // + op = BO_OR; } // - // From now on we 'or' every newchk + // Come back to the rest of the filter // - op = BO_OR; + m_filter->pop_expression(); + m_nest_level--; } - - // - // Come back to the rest of the filter - // - m_filter->pop_expression(); - m_nest_level--; } else { diff --git a/userspace/libsinsp/filterchecks.cpp b/userspace/libsinsp/filterchecks.cpp index d40cdf9d48..f4e42131b9 100644 --- a/userspace/libsinsp/filterchecks.cpp +++ b/userspace/libsinsp/filterchecks.cpp @@ -270,6 +270,7 @@ uint8_t* sinsp_filter_check_fd::extract_from_null_fd(sinsp_evt *evt, OUT uint32_ { if(extract_fdname_from_creator(evt, len) == true) { + *len = m_tstr.size(); return (uint8_t*)m_tstr.c_str(); } else @@ -282,6 +283,7 @@ uint8_t* sinsp_filter_check_fd::extract_from_null_fd(sinsp_evt *evt, OUT uint32_ if(extract_fdname_from_creator(evt, len) == true) { m_tstr = m_tinfo->m_container_id + ':' + m_tstr; + *len = m_tstr.size(); return (uint8_t*)m_tstr.c_str(); } else @@ -308,6 +310,7 @@ uint8_t* sinsp_filter_check_fd::extract_from_null_fd(sinsp_evt *evt, OUT uint32_ m_tstr = "/"; } + *len = m_tstr.size(); return (uint8_t*)m_tstr.c_str(); } else @@ -335,6 +338,7 @@ uint8_t* sinsp_filter_check_fd::extract_from_null_fd(sinsp_evt *evt, OUT uint32_ } m_tstr = m_tinfo->m_container_id + ':' + m_tstr; + *len = m_tstr.size(); return (uint8_t*)m_tstr.c_str(); } else @@ -363,6 +367,7 @@ uint8_t* sinsp_filter_check_fd::extract_from_null_fd(sinsp_evt *evt, OUT uint32_ } } + *len = m_tstr.size(); return (uint8_t*)m_tstr.c_str(); } else @@ -474,6 +479,7 @@ uint8_t* sinsp_filter_check_fd::extract(sinsp_evt *evt, OUT uint32_t* len) } sanitize_string(m_tstr); + *len = m_tstr.size(); return (uint8_t*)m_tstr.c_str(); case TYPE_FDTYPE: @@ -520,6 +526,7 @@ uint8_t* sinsp_filter_check_fd::extract(sinsp_evt *evt, OUT uint32_t* len) m_tstr = m_tinfo->m_container_id + ':' + m_tstr; } + *len = m_tstr.size(); return (uint8_t*)m_tstr.c_str(); } case TYPE_FILENAME: @@ -550,6 +557,7 @@ uint8_t* sinsp_filter_check_fd::extract(sinsp_evt *evt, OUT uint32_t* len) m_tstr = "/"; } + *len = m_tstr.size(); return (uint8_t*)m_tstr.c_str(); } case TYPE_FDTYPECHAR: @@ -700,6 +708,7 @@ uint8_t* sinsp_filter_check_fd::extract(sinsp_evt *evt, OUT uint32_t* len) port = port_to_string(m_fdinfo->m_sockinfo.m_ipv6info.m_fields.m_sport, this->m_fdinfo->get_l4proto(), m_inspector->m_hostname_and_port_resolution_enabled); } + *len = port.size(); return (uint8_t*)port.c_str(); } case TYPE_SERVERPORT: @@ -792,6 +801,7 @@ uint8_t* sinsp_filter_check_fd::extract(sinsp_evt *evt, OUT uint32_t* len) port = port_to_string(nport, this->m_fdinfo->get_l4proto(), m_inspector->m_hostname_and_port_resolution_enabled); } + *len = port.size(); return (uint8_t*)port.c_str(); } case TYPE_LPORT: @@ -896,6 +906,7 @@ uint8_t* sinsp_filter_check_fd::extract(sinsp_evt *evt, OUT uint32_t* len) ASSERT(false); } + *len = port.size(); return (uint8_t*)port.c_str(); } @@ -927,6 +938,7 @@ uint8_t* sinsp_filter_check_fd::extract(sinsp_evt *evt, OUT uint32_t* len) break; } + *len = m_tstr.size(); return (uint8_t*)m_tstr.c_str(); } case TYPE_IS_SERVER: @@ -963,11 +975,13 @@ uint8_t* sinsp_filter_check_fd::extract(sinsp_evt *evt, OUT uint32_t* len) if(m_fdinfo->m_type == SCAP_FD_IPV4_SOCK || m_fdinfo->m_type == SCAP_FD_IPV6_SOCK) { m_tstr = "ip"; + *len = m_tstr.size(); return (uint8_t*)m_tstr.c_str(); } else if(m_fdinfo->m_type == SCAP_FD_UNIX_SOCK) { m_tstr = "unix"; + *len = m_tstr.size(); return (uint8_t*)m_tstr.c_str(); } else @@ -981,6 +995,7 @@ uint8_t* sinsp_filter_check_fd::extract(sinsp_evt *evt, OUT uint32_t* len) ASSERT(m_tinfo != NULL); m_tstr = to_string(m_tinfo->m_tid) + to_string(m_tinfo->m_lastevent_fd); + *len = m_tstr.size(); return (uint8_t*)m_tstr.c_str(); } break; @@ -1243,8 +1258,9 @@ bool sinsp_filter_check_fd::compare(sinsp_evt *evt) } return flt_compare(m_cmpop, - m_info.m_fields[m_field_id].m_type, - extracted_val); + m_info.m_fields[m_field_id].m_type, + extracted_val, + len); } /////////////////////////////////////////////////////////////////////////////// @@ -1561,6 +1577,7 @@ uint8_t* sinsp_filter_check_thread::extract(sinsp_evt *evt, OUT uint32_t* len) if(sinfo != NULL) { m_tstr = sinfo->get_comm(); + *len = m_tstr.size(); return (uint8_t*)m_tstr.c_str(); } else @@ -1583,14 +1600,17 @@ uint8_t* sinsp_filter_check_thread::extract(sinsp_evt *evt, OUT uint32_t* len) // At this point pt either doesn't exist or has a different session id. // mt's comm is considered the session leader. m_tstr = mt->get_comm(); + *len = m_tstr.size(); return (uint8_t*)m_tstr.c_str(); } } case TYPE_NAME: m_tstr = tinfo->get_comm(); + *len = m_tstr.size(); return (uint8_t*)m_tstr.c_str(); case TYPE_EXE: m_tstr = tinfo->get_exe(); + *len = m_tstr.size(); return (uint8_t*)m_tstr.c_str(); case TYPE_ARGS: { @@ -1608,6 +1628,7 @@ uint8_t* sinsp_filter_check_thread::extract(sinsp_evt *evt, OUT uint32_t* len) } } + *len = m_tstr.size(); return (uint8_t*)m_tstr.c_str(); } case TYPE_ENV: @@ -1626,6 +1647,7 @@ uint8_t* sinsp_filter_check_thread::extract(sinsp_evt *evt, OUT uint32_t* len) } } + *len = m_tstr.size(); return (uint8_t*)m_tstr.c_str(); } case TYPE_CMDLINE: @@ -1644,6 +1666,7 @@ uint8_t* sinsp_filter_check_thread::extract(sinsp_evt *evt, OUT uint32_t* len) } } + *len = m_tstr.size(); return (uint8_t*)m_tstr.c_str(); } case TYPE_EXELINE: @@ -1662,10 +1685,12 @@ uint8_t* sinsp_filter_check_thread::extract(sinsp_evt *evt, OUT uint32_t* len) } } + *len = m_tstr.size(); return (uint8_t*)m_tstr.c_str(); } case TYPE_CWD: m_tstr = tinfo->get_cwd(); + *len = m_tstr.size(); return (uint8_t*)m_tstr.c_str(); case TYPE_NTHREADS: { @@ -1747,6 +1772,7 @@ uint8_t* sinsp_filter_check_thread::extract(sinsp_evt *evt, OUT uint32_t* len) if(ptinfo != NULL) { m_tstr = ptinfo->get_comm(); + *len = m_tstr.size(); return (uint8_t*)m_tstr.c_str(); } else @@ -1816,6 +1842,7 @@ uint8_t* sinsp_filter_check_thread::extract(sinsp_evt *evt, OUT uint32_t* len) } m_tstr = mt->get_comm(); + *len = m_tstr.size(); return (uint8_t*)m_tstr.c_str(); } case TYPE_LOGINSHELLID: @@ -1951,6 +1978,7 @@ uint8_t* sinsp_filter_check_thread::extract(sinsp_evt *evt, OUT uint32_t* len) } } + *len = m_tstr.size(); return (uint8_t*)m_tstr.c_str(); } case TYPE_CGROUP: @@ -1967,6 +1995,7 @@ uint8_t* sinsp_filter_check_thread::extract(sinsp_evt *evt, OUT uint32_t* len) if(tinfo->m_cgroups[j].first == m_argname) { m_tstr = tinfo->m_cgroups[j].second; + *len = m_tstr.size(); return (uint8_t*)m_tstr.c_str(); } } @@ -2682,6 +2711,7 @@ uint8_t *sinsp_filter_check_event::extract_abspath(sinsp_evt *evt, OUT uint32_t m_strstorage = fullname; + *len = m_strstorage.size(); return (uint8_t*)m_strstorage.c_str(); } @@ -2788,12 +2818,15 @@ uint8_t* sinsp_filter_check_event::extract(sinsp_evt *evt, OUT uint32_t* len) { case TYPE_TIME: ts_to_string(evt->get_ts(), &m_strstorage, false, true); + *len = m_strstorage.size(); return (uint8_t*)m_strstorage.c_str(); case TYPE_TIME_S: ts_to_string(evt->get_ts(), &m_strstorage, false, false); + *len = m_strstorage.size(); return (uint8_t*)m_strstorage.c_str(); case TYPE_DATETIME: ts_to_string(evt->get_ts(), &m_strstorage, true, true); + *len = m_strstorage.size(); return (uint8_t*)m_strstorage.c_str(); case TYPE_RAWTS: return (uint8_t*)&evt->m_pevt->ts; @@ -2850,6 +2883,7 @@ uint8_t* sinsp_filter_check_event::extract(sinsp_evt *evt, OUT uint32_t* len) m_strstorage = m_converter->tostring_nice(NULL, 0, 1000000000); } + *len = m_strstorage.size(); return (uint8_t*)m_strstorage.c_str(); } case TYPE_LATENCY_S: @@ -2946,12 +2980,14 @@ uint8_t* sinsp_filter_check_event::extract(sinsp_evt *evt, OUT uint32_t* len) { case 'h': ts_to_string(evt->get_ts(), &m_strstorage, false, true); + *len = m_strstorage.size(); return (uint8_t*)m_strstorage.c_str(); case 'a': m_strstorage += to_string(evt->get_ts() / ONE_SECOND_IN_NS); m_strstorage += "."; m_strstorage += to_string(evt->get_ts() % ONE_SECOND_IN_NS); + *len = m_strstorage.size(); return (uint8_t*) m_strstorage.c_str(); case 'r': @@ -2959,6 +2995,7 @@ uint8_t* sinsp_filter_check_event::extract(sinsp_evt *evt, OUT uint32_t* len) m_strstorage += "."; snprintf(timebuffer, sizeof(timebuffer), "%09llu", (evt->get_ts() - m_inspector->m_firstevent_ts) % ONE_SECOND_IN_NS); m_strstorage += string(timebuffer); + *len = m_strstorage.size(); return (uint8_t*) m_strstorage.c_str(); case 'd': @@ -2977,6 +3014,7 @@ uint8_t* sinsp_filter_check_event::extract(sinsp_evt *evt, OUT uint32_t* len) m_strstorage = "0.000000000"; } + *len = m_strstorage.size(); return (uint8_t*) m_strstorage.c_str(); } @@ -2996,16 +3034,19 @@ uint8_t* sinsp_filter_check_event::extract(sinsp_evt *evt, OUT uint32_t* len) m_tsdelta = (tts - m_u64val) % ONE_SECOND_IN_NS; m_u64val = tts; + *len = m_strstorage.size(); return (uint8_t*) m_strstorage.c_str(); } } case TYPE_DIR: if(PPME_IS_ENTER(evt->get_type())) { + *len = 1; return (uint8_t*)">"; } else { + *len = 1; return (uint8_t*)"<"; } case TYPE_TYPE: @@ -3157,6 +3198,7 @@ uint8_t* sinsp_filter_check_event::extract(sinsp_evt *evt, OUT uint32_t* len) break; } + *len = m_strstorage.size(); return (uint8_t*)m_strstorage.c_str(); case TYPE_NUMBER: return (uint8_t*)&evt->m_evtnum; @@ -3255,6 +3297,7 @@ uint8_t* sinsp_filter_check_event::extract(sinsp_evt *evt, OUT uint32_t* len) } } + *len = m_strstorage.size(); return (uint8_t*)m_strstorage.c_str(); } break; @@ -3456,6 +3499,7 @@ uint8_t* sinsp_filter_check_event::extract(sinsp_evt *evt, OUT uint32_t* len) return NULL; } + *len = m_strstorage.size(); return (uint8_t*)m_strstorage.c_str(); } case TYPE_ISWAIT: @@ -4341,6 +4385,7 @@ uint8_t* sinsp_filter_check_tracer::extract(sinsp_evt *evt, OUT uint32_t* len) case TYPE_TIME: { ts_to_string(evt->get_ts(), &m_strstorage, false, true); + *len = m_strstorage.size(); return (uint8_t*)m_strstorage.c_str(); } case TYPE_NTAGS: @@ -4436,6 +4481,7 @@ uint8_t* sinsp_filter_check_tracer::extract(sinsp_evt *evt, OUT uint32_t* len) } } + *len = m_strstorage.size(); return (uint8_t*)m_strstorage.c_str(); } case TYPE_ARGS: @@ -4479,6 +4525,7 @@ uint8_t* sinsp_filter_check_tracer::extract(sinsp_evt *evt, OUT uint32_t* len) m_strstorage = m_converter->tostring_nice(NULL, 0, 1000000000); } + *len = m_strstorage.size(); return (uint8_t*)m_strstorage.c_str(); } case TYPE_DURATION_QUANTIZED: @@ -4552,6 +4599,7 @@ uint8_t* sinsp_filter_check_tracer::extract(sinsp_evt *evt, OUT uint32_t* len) case TYPE_RAWTIME: { m_strstorage = to_string(eparser->m_enter_pae->m_time); + *len = m_strstorage.size(); return (uint8_t*)m_strstorage.c_str(); } case TYPE_RAWPARENTTIME: @@ -4564,6 +4612,7 @@ uint8_t* sinsp_filter_check_tracer::extract(sinsp_evt *evt, OUT uint32_t* len) } m_strstorage = to_string(pepae->m_time); + *len = m_strstorage.size(); return (uint8_t*)m_strstorage.c_str(); } default: @@ -5235,6 +5284,7 @@ uint8_t* sinsp_filter_check_syslog::extract(sinsp_evt *evt, OUT uint32_t* len) case TYPE_SEVERITY_STR: return (uint8_t*)m_decoder->get_severity_str(); case TYPE_MESSAGE: + *len = m_decoder->m_msg.size(); return (uint8_t*)m_decoder->m_msg.c_str(); default: ASSERT(false); @@ -5286,6 +5336,7 @@ uint8_t* sinsp_filter_check_container::extract(sinsp_evt *evt, OUT uint32_t* len m_tstr = tinfo->m_container_id; } + *len = m_tstr.size(); return (uint8_t*)m_tstr.c_str(); case TYPE_CONTAINER_NAME: if(tinfo->m_container_id.empty()) @@ -5309,6 +5360,7 @@ uint8_t* sinsp_filter_check_container::extract(sinsp_evt *evt, OUT uint32_t* len m_tstr = container_info.m_name; } + *len = m_tstr.size(); return (uint8_t*)m_tstr.c_str(); case TYPE_CONTAINER_IMAGE: if(tinfo->m_container_id.empty()) @@ -5332,6 +5384,7 @@ uint8_t* sinsp_filter_check_container::extract(sinsp_evt *evt, OUT uint32_t* len m_tstr = container_info.m_image; } + *len = m_tstr.size(); return (uint8_t*)m_tstr.c_str(); case TYPE_CONTAINER_TYPE: if(tinfo->m_container_id.empty()) @@ -5368,6 +5421,7 @@ uint8_t* sinsp_filter_check_container::extract(sinsp_evt *evt, OUT uint32_t* len break; } } + *len = m_tstr.size(); return (uint8_t*)m_tstr.c_str(); default: ASSERT(false); @@ -5941,6 +5995,7 @@ uint8_t* sinsp_filter_check_fdlist::extract(sinsp_evt *evt, OUT uint32_t* len) m_strval = m_strval.substr(0, m_strval.size() - 1); } + *len = m_strval.size(); return (uint8_t*)m_strval.c_str(); } else @@ -6176,14 +6231,17 @@ uint8_t* sinsp_filter_check_k8s::extract(sinsp_evt *evt, OUT uint32_t* len) { case TYPE_K8S_POD_NAME: m_tstr = pod->get_name(); + *len = m_tstr.size(); return (uint8_t*) m_tstr.c_str(); case TYPE_K8S_POD_ID: m_tstr = pod->get_uid(); + *len = m_tstr.size(); return (uint8_t*) m_tstr.c_str(); case TYPE_K8S_POD_LABEL: { if(find_label(pod->get_labels(), m_argname, &m_tstr)) { + *len = m_tstr.size(); return (uint8_t*) m_tstr.c_str(); } @@ -6192,6 +6250,7 @@ uint8_t* sinsp_filter_check_k8s::extract(sinsp_evt *evt, OUT uint32_t* len) case TYPE_K8S_POD_LABELS: { concatenate_labels(pod->get_labels(), &m_tstr); + *len = m_tstr.size(); return (uint8_t*) m_tstr.c_str(); } case TYPE_K8S_RC_NAME: @@ -6200,6 +6259,7 @@ uint8_t* sinsp_filter_check_k8s::extract(sinsp_evt *evt, OUT uint32_t* len) if(rc != NULL) { m_tstr = rc->get_name(); + *len = m_tstr.size(); return (uint8_t*) m_tstr.c_str(); } @@ -6211,6 +6271,7 @@ uint8_t* sinsp_filter_check_k8s::extract(sinsp_evt *evt, OUT uint32_t* len) if(rc != NULL) { m_tstr = rc->get_uid(); + *len = m_tstr.size(); return (uint8_t*) m_tstr.c_str(); } @@ -6223,6 +6284,7 @@ uint8_t* sinsp_filter_check_k8s::extract(sinsp_evt *evt, OUT uint32_t* len) { if(find_label(rc->get_labels(), m_argname, &m_tstr)) { + *len = m_tstr.size(); return (uint8_t*) m_tstr.c_str(); } } @@ -6235,6 +6297,7 @@ uint8_t* sinsp_filter_check_k8s::extract(sinsp_evt *evt, OUT uint32_t* len) if(rc != NULL) { concatenate_labels(rc->get_labels(), &m_tstr); + *len = m_tstr.size(); return (uint8_t*) m_tstr.c_str(); } @@ -6255,6 +6318,7 @@ uint8_t* sinsp_filter_check_k8s::extract(sinsp_evt *evt, OUT uint32_t* len) m_tstr.append(service->get_name()); } + *len = m_tstr.size(); return (uint8_t*) m_tstr.c_str(); } @@ -6275,6 +6339,7 @@ uint8_t* sinsp_filter_check_k8s::extract(sinsp_evt *evt, OUT uint32_t* len) m_tstr.append(service->get_uid()); } + *len = m_tstr.size(); return (uint8_t*) m_tstr.c_str(); } @@ -6301,6 +6366,7 @@ uint8_t* sinsp_filter_check_k8s::extract(sinsp_evt *evt, OUT uint32_t* len) if(!m_tstr.empty()) { + *len = m_tstr.size(); return (uint8_t*) m_tstr.c_str(); } } @@ -6317,6 +6383,7 @@ uint8_t* sinsp_filter_check_k8s::extract(sinsp_evt *evt, OUT uint32_t* len) concatenate_labels(service->get_labels(), &m_tstr); } + *len = m_tstr.size(); return (uint8_t*) m_tstr.c_str(); } @@ -6325,6 +6392,7 @@ uint8_t* sinsp_filter_check_k8s::extract(sinsp_evt *evt, OUT uint32_t* len) case TYPE_K8S_NS_NAME: { m_tstr = pod->get_namespace(); + *len = m_tstr.size(); return (uint8_t*) m_tstr.c_str(); } case TYPE_K8S_NS_ID: @@ -6333,6 +6401,7 @@ uint8_t* sinsp_filter_check_k8s::extract(sinsp_evt *evt, OUT uint32_t* len) if(ns != NULL) { m_tstr = ns->get_uid(); + *len = m_tstr.size(); return (uint8_t*) m_tstr.c_str(); } @@ -6345,6 +6414,7 @@ uint8_t* sinsp_filter_check_k8s::extract(sinsp_evt *evt, OUT uint32_t* len) { if(find_label(ns->get_labels(), m_argname, &m_tstr)) { + *len = m_tstr.size(); return (uint8_t*) m_tstr.c_str(); } } @@ -6357,6 +6427,7 @@ uint8_t* sinsp_filter_check_k8s::extract(sinsp_evt *evt, OUT uint32_t* len) if(ns != NULL) { concatenate_labels(ns->get_labels(), &m_tstr); + *len = m_tstr.size(); return (uint8_t*) m_tstr.c_str(); } @@ -6574,18 +6645,22 @@ uint8_t* sinsp_filter_check_mesos::extract(sinsp_evt *evt, OUT uint32_t* len) { case TYPE_MESOS_TASK_NAME: m_tstr = task->get_name(); + *len = m_tstr.size(); return (uint8_t*) m_tstr.c_str(); case TYPE_MESOS_TASK_ID: m_tstr = task->get_uid(); + *len = m_tstr.size(); return (uint8_t*) m_tstr.c_str(); case TYPE_MESOS_TASK_LABEL: if(find_label(task->get_labels(), m_argname, &m_tstr)) { + *len = m_tstr.size(); return (uint8_t*) m_tstr.c_str(); } break; case TYPE_MESOS_TASK_LABELS: concatenate_labels(task->get_labels(), &m_tstr); + *len = m_tstr.size(); return (uint8_t*) m_tstr.c_str(); case TYPE_MESOS_FRAMEWORK_NAME: { @@ -6593,6 +6668,7 @@ uint8_t* sinsp_filter_check_mesos::extract(sinsp_evt *evt, OUT uint32_t* len) if(fw) { m_tstr = fw->get_name(); + *len = m_tstr.size(); return (uint8_t*) m_tstr.c_str(); } break; @@ -6603,6 +6679,7 @@ uint8_t* sinsp_filter_check_mesos::extract(sinsp_evt *evt, OUT uint32_t* len) if(fw) { m_tstr = fw->get_uid(); + *len = m_tstr.size(); return (uint8_t*) m_tstr.c_str(); } break; @@ -6613,6 +6690,7 @@ uint8_t* sinsp_filter_check_mesos::extract(sinsp_evt *evt, OUT uint32_t* len) if(app != NULL) { m_tstr = app->get_name(); + *len = m_tstr.size(); return (uint8_t*) m_tstr.c_str(); } break; @@ -6623,6 +6701,7 @@ uint8_t* sinsp_filter_check_mesos::extract(sinsp_evt *evt, OUT uint32_t* len) if(app != NULL) { m_tstr = app->get_id(); + *len = m_tstr.size(); return (uint8_t*) m_tstr.c_str(); } @@ -6633,6 +6712,7 @@ uint8_t* sinsp_filter_check_mesos::extract(sinsp_evt *evt, OUT uint32_t* len) marathon_app::ptr_t app = find_app_by_task(task); if(app && find_label(app->get_labels(), m_argname, &m_tstr)) { + *len = m_tstr.size(); return (uint8_t*) m_tstr.c_str(); } @@ -6644,6 +6724,7 @@ uint8_t* sinsp_filter_check_mesos::extract(sinsp_evt *evt, OUT uint32_t* len) if(app) { concatenate_labels(app->get_labels(), &m_tstr); + *len = m_tstr.size(); return (uint8_t*) m_tstr.c_str(); } break; @@ -6654,6 +6735,7 @@ uint8_t* sinsp_filter_check_mesos::extract(sinsp_evt *evt, OUT uint32_t* len) if(app) { m_tstr = app->get_group_id(); + *len = m_tstr.size(); return (uint8_t*) m_tstr.c_str(); } break; @@ -6664,6 +6746,7 @@ uint8_t* sinsp_filter_check_mesos::extract(sinsp_evt *evt, OUT uint32_t* len) if(app) { m_tstr = app->get_group_id(); + *len = m_tstr.size(); return (uint8_t*) m_tstr.c_str(); } break; diff --git a/userspace/libsinsp/filterchecks.h b/userspace/libsinsp/filterchecks.h index ddf06f2418..99949ba129 100644 --- a/userspace/libsinsp/filterchecks.h +++ b/userspace/libsinsp/filterchecks.h @@ -17,6 +17,7 @@ along with sysdig. If not, see . */ #pragma once +#include #include #include "k8s.h" #include "mesos.h" @@ -42,6 +43,44 @@ class operand_info string m_description; }; +// Used for CO_IN filterchecks using PT_CHARBUFs to allow for quick +// multi-value comparisons. Should also work for any filtercheck with +// a buffer and length. When compiling with gnu compilers, use the +// built in but not standard _hash_impl::hash function, which uses +// murmurhash2 and is quite fast. Otherwise, uses +// http://www.cse.yorku.ca/~oz/hash.html. + +// Used by m_val_storages_members +typedef pair filter_value_member_t; + +struct g_hash_membuf +{ + size_t operator()(filter_value_member_t val) const + { +#ifdef __GNUC__ + return std::_Hash_impl::hash(val.first, val.second); +#else + size_t hash = 5381; + for(uint8_t *p = val.first; p-val.first < val.second; p++) + { + int c = *p; + + hash = ((hash << 5) + hash) + c; /* hash * 33 + c */ + } + return hash; +#endif + } +}; + +struct g_equal_to_membuf +{ + bool operator()(filter_value_member_t a, filter_value_member_t b) const + { + return (a.second == b.second && + memcmp(a.first, b.first, a.second) == 0); + } +}; + /////////////////////////////////////////////////////////////////////////////// // The filter check interface // NOTE: in order to add a new type of filter check, you need to add a class for @@ -149,6 +188,13 @@ class sinsp_filter_check inline uint8_t* filter_value_p(uint16_t i = 0) { return &m_val_storages[i][0]; } inline vector filter_value(uint16_t i = 0) { return m_val_storages[i]; } + unordered_set m_val_storages_members; + + uint32_t m_val_storages_min_size; + uint32_t m_val_storages_max_size; + const filtercheck_field_info* m_field; filter_check_info m_info; uint32_t m_field_id;