I have a Kusto table that has the following structure:
Name | File | IngestType |
---|---|---|
A | F1 | output |
B | F1 | input |
B | F2 | output |
C | F2 | input |
D | F2 | input |
I want to start with a given Name, say A
and run a query where I can build a flat tree up to a certain level. So far my solution is to write a query for each level and union the data in the end:
let parent = table | where Name == 'A';
let child_level_1 = table | where type == 'input'
| join kind=inner parent on File
| ... // project own output logic for child_level_2 to consume
let child_level_2 = table | where type == 'input'
| join kind=inner child_level_1 on File
...
let child_level_10 = ...
Could the above be built dynamically in Kusto? Something on the lines of "while children have outputs consumed by others, keep query-ing"
Could the above be built dynamically in Kusto? Something on the lines of "while children have outputs consumed by others, keep query-ing"
No. What you wrote above (using child_level_i) is the only way.
Kusto supports now directed graph, which would allow you to achieve what you want by building a process tree then filtering out branches https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/graph-operators
Have a look there: https://github.com/microsoft/Kusto-Query-Language/issues/133#issuecomment-1868939619
// working dummy data
let Data = datatable (Computer:string, process_parent_command_line:string, process_command_line:string, process_parent_id:string, process_id :string)[
"ComputerA", "root cmd", "cmd lvl1", 0, 1,
"ComputerA", "cmd lvl1", "cmd lvl2", 1, 2,
"ComputerA", "cmd lvl1", "cmd lvl1.2",1, 1.2,
"ComputerA", "cmd lvl2", "cmd lvl3", 2, 3,
"ComputerA", "cmd lvl3", "cmd /C reg load lvl4", 3, 4,
"ComputerA", "cmd /C reg load lvl4", "cmd reg load lvl5", 4, 5,
"ComputerA", "cmd reg load lvl5", "cmd lvl6",5, 6,
// Computer B, branch 000, reg load 1
"ComputerB", "root cmd - branch 00000", "cmd lvl1", 10, 11,
"ComputerB", "cmd lvl1", "cmd /C reg load lvl2", 11, 12,
"ComputerB", "cmd /C reg load lvl2", "cmd reg load lvl3", 12, 13,
// Computer B bracnh 000 noise
"ComputerB", "cmd lvl3", "cmd lvl4", 13, 14,
"ComputerB", "cmd lvl4", "cmd lvl5", 14, 15,
"ComputerB", "cmd lvl5", "cmd lvl6",15, 16,
// Computer B Branch AAAA - reg load - longuest
"ComputerB", "root cmd - branch AAA", "cmd lvl1 - branch AAA", 20, 21,
"ComputerB", "cmd lvl1 - branch AAA", "cmd lvl2 - branch AAA", 21, 22,
"ComputerB", "cmd lvl2 - branch AAA", "cmd lvl3 - branch AAA",22, 23,
"ComputerB", "cmd lvl3 - branch AAA", "cmd reg load", 23,24,
// Computer B Branch BBBB - reg load - shortest
"ComputerB", "root cmd - branch BBB", "cmd lvl1 - branch BBB", 30,31,
"ComputerB", "cmd lvl1 - branch BBB", "cmd reg load", 31,32,
]
// create requiered column for graph
// adapt values if required
| project parent_id = strcat_delim("_",Computer, process_parent_id, process_parent_command_line),
child_id = strcat_delim("_", Computer, process_id, process_command_line),
Computer, process_command_line,
process_parent_command_line,
user_name = "bob",
RenderedDescription = "process launched",
EventID = 1,
file_directory = "C:/",
process_id, process_parent_id,
process_path = replace_string(process_command_line, "cmd","path"),
process_parent_path = replace_string(process_parent_command_line, "cmd","path")
;
// build node info table for graph
// needs to list all unique cmd/parent cmd in a one column
let process_info =// materialize(
Data
// reduce data amount
// create a row pre process child with its info
| distinct parent_id, child_id,
user_name, process_command_line, process_parent_command_line, process_path, process_parent_path, Computer,
RenderedDescription, EventID, file_directory, process_id, process_parent_id
| as hint.materialized=false unique_cmd_child_parent // false more efficient here
| project node_id = child_id,
user_name, process_command_line, process_path, Computer, RenderedDescription, file_directory, process_id
// create a row pre process parent with its info
| union (unique_cmd_child_parent
| project node_id = parent_id,
user_name, process_command_line=process_parent_command_line, process_path = process_parent_path,
Computer, RenderedDescription, file_directory, process_id = process_parent_id
)
| distinct node_id, user_name, process_command_line, process_path, process_id, Computer, RenderedDescription, file_directory
// )
;
Data
| project parent_id, child_id,
process_parent_command_line, process_command_line // parent/child ids are built on cmd and computer
| make-graph parent_id --> child_id with process_info on node_id
//| make-graph process_parent_command_line --> process_command_line with process_info on process_command_line
| graph-match (parent) -[edge*1..10]-> (child)
where child.process_command_line has "reg"
and child.process_command_line has_any("load","unload")
//
//and not( // keeps only A -> /c reg load and not A -> /C reg load -> reg load
// parent.process_command_line has "reg"
// and parent.process_command_line has_any("load","unload")
// )
project root_parent_user_name = parent.user_name,
root_cmd = parent.process_command_line,
root_parent_path = parent.process_path,
root_nodeId = parent.node_id,
process_branch = todynamic(edge.process_parent_command_line),
node_id_list = todynamic(edge.child_id),
last_child_nodeId = child.node_id,
last_child_cmd = child.process_command_line,
last_child_pid = child.process_id,
Computer = parent.Computer
// back to tabular mode
| extend process_branch_length = array_length(process_branch)
| summarize process_branch_length = arg_max(process_branch_length, *) by last_child_cmd, Computer, last_child_pid // keep only the longest branch for a child_cmd by computer
// deals with case where branch is 'root cmd' --> 'parent cmd /c reg load' -> 'cmd /c reg load' => 2 results at previous step
// Note that some similar cmd might looks duplicated (ex: %%systemDrive%% vs %systemDrive%)
| order by Computer, process_branch_length desc
| where not(prev(Computer) == Computer
//and prev(last_child_cmd) == process_branch[-1]
and prev(process_branch_length) > process_branch_length
and prev(root_cmd) == root_cmd
)
| project-reorder Computer, root_parent_user_name, root_cmd, last_child_cmd, process_branch
if you are looking for a particular process branch, you could target it by defining the process tree as a time sequence of processes launched or command-lines with evaluate sequence_detect plugin.
datatable (Time: datetime, parent:string, child: string)[
'2023-01-13T14:01:06.3120799Z', "Grand parent", "parent",
'2023-01-13T14:01:07.3120799Z', "parent", "child",
'2023-01-13T14:01:08.3120799Z', "child", "grand child",
'2023-01-13T14:01:08.3120799Z', "noise parent", "noise child"
'2023-01-13T14:01:08.3120799Z', "noise child", "noise grandchild"
]
| evaluate sequence_detect(Time, // time column to look for sequence
3s, // timespan allowed for the sequence completion
2s, // max timespan between each step
// process tree as condition
GrandParentFound = parent == "Grand parent" and child == "parent",
ParentFound = parent == "parent" and child == "child",
ChildFound = parent == "child" and child == "grand child"
)
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With