Here's the source code of the script that limits memory and time consumption of a "black-box" process in Linux. Its implementation details are thoroughly described in the blog post.
The bleeding-edge latest version (and the contributions made by others but not yet merged) may be downloaded from the GutHub page for timeout script.
See README with usage information and examples on GitHub as well.
If you find a bug, please, use the issue tracker at GitHub (or make a pull request.) Other, less preferable ways, include writing an e-mail or commenting this post.
The script is licensed under Apache-2.0, and was developed mainly during my day job in ISPRAS while working on Linux Driver Verification project. I designed and implemented the most part of the script, several contributions were made from Alexander Strakh.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 |
#!/usr/bin/perl -w # Copyright 2010-2011 Institute for System Programming # of Russian Academy of Sciences # Copyright 2012 Pavel Shved <pavel.shved@gmail.com> # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # Resource monitoring script for limiting black-boxed processes. # It runs an arbitrary process and watches for memory and time consumption. # The main feature is that it watches not only the process spawned, but also # its children--as long as a process doesn't detach ownership from parent (or # doesn't change process group; use -w). sub usage{ print STDERR <<usage_ends; Usage: timeout [-t timelimit] [-m memlimit] [-x herz] command [argumens ...] usage_ends die ; } # We use require_order because we don't want to process options of the command we run use Getopt::Long qw(:config require_order) ; use Time::HiRes qw( gettimeofday ) ; my $timelimit = undef ; my $hanguplimit = undef ; my $kill_stale = '' ; my $ticklimit = undef ; my $memlimit = undef ; my $strpat = undef ; # Output for statistic buckets (STDERR if unspecified) my $output = undef ; my $reference = undef ; my $watchfor = 'tree' ; # Requests per seccond my $frequency = 10; # if we debug my $debug = '' ; # Whether we do not do this term-kill stuff, and just kill processes at once my $just_kill = '' ; # Suppress printing stats when there was no resource violation my $info_on_success = 1; # Do not conceal the exit code of the controlled process if timeout kills it. my $confess = '' ; GetOptions( 'timelimit|t=i' =>\ $timelimit , 'hanguplimit|h=i' =>\ $hanguplimit , 'detect-hangup!' =>\ $kill_stale , 'detect-hangups!' =>\ $kill_stale , # allow-hangups is kept for backward compatibility. 'allow-hangups!' =>\ $kill_stale , 'memlimit|m=i' =>\ $memlimit , 'frequency|x=i' =>\ $frequency , 'pattern|p=s' =>\ $strpat , 'output|o=s' =>\ $output , 'reference|r=s' =>\ $reference , 'watchfor|w=s' =>\ $watchfor , 'debug!' =>\ $debug , 'just-kill' =>\ $just_kill , 'info-on-success!' =>\ $info_on_success , 'confess|c!' =>\ $confess , ) or usage; @ARGV or usage; my $uinfo = get_patterns( $strpat ); my $uwait = int (1_000_000 / $frequency ); my $uflush_time = 100_000; # String to identify thes script's prints in the output my $id_str = $ENV { 'TIMEOUT_IDSTR' } || '' ; use strict; # Fork process and set its process group my $blackbox_pid = fork ; defined $blackbox_pid or die "Couldn't fork: $!" ; unless ( $blackbox_pid ){ # Set the process group of the blackbox process # We do not need to change a process group if we aren't using it to control our jobs unless ( $watchfor eq 'tree' ) { setpgrp 0,0; } local $ " = " "; exec @ARGV or die "Couldn't exec @ARGV: $!" ; } # Make sure we kill forked child on exit sub when_die{ print_uinfo( 'SIGNAL' , $uinfo ); kill_process_group_safely( $blackbox_pid ); exit -1; }; $SIG { 'INT' } = \&when_die; $SIG { 'TERM' } = \&when_die; $SIG { 'QUIT' } = \&when_die; # We sleep between sending TERM and KILL to kids, so we might end up in a regular tick instead of the kill procedure! # Here's a block for that my $dying = 0; use Data::Dumper; # Now we'll just keep polling status of the process until we notice that resources are exhausted or until the child finishes use Time::HiRes qw( ualarm usleep ) ; # see sub update_time for explanations of this structure my $timeinfo = { total => 0, finished => 0, current => {} }; # For this -- update_memory my $meminfo = 0; my $fulltime = 0; # We store the "maximum" used memory (the process may free it and we won't get the proper timestamp at the end). my $maxmem = -1; # Default ticklimit - limit of number of timeout script wakeups (ticks) before we decide that the controlling processes are hang up (if they haven't done any useful work). We use ticks instead of real time seconds because the whole stack may be paused with SIGSTOP, and should not die in this case. # if (! $hanguplimit && $timelimit ) { # If unspecified, then wait for the same time the timelimit is set up $ticklimit = $timelimit *$frequency ; } elsif ( $hanguplimit ) { $ticklimit = $hanguplimit *$frequency ; } my $status = 'wait' ; my $box_status = 0; while ( $status eq 'wait' ){ my $child_errno ; my $child_retv ; # Usually we would just do alarm-waitpid, but in Perl we should do weird evals. # Refer to `perldoc alarm` if surprised. eval { local $SIG { 'ALRM' } = sub { # If we're dying don't return to the embracing eval, return somewhere else! return if $dying ; # Note that this signal can only interrupt "wait" function (unless we're currently in some internals of Perl implementation of waitpid wrapper, but must of the time we spend inside the wait() call). # According to signal(7), wait is a safe function, so we can call anything we want here. $timeinfo = update_time( $blackbox_pid , $timeinfo ); $meminfo = update_memory( $blackbox_pid , $meminfo ); $maxmem = $meminfo if $meminfo > $maxmem ; update_info_by_ucmd( $blackbox_pid , $uinfo ); die "Assume waitpid return 0\n" ; }; ualarm $uwait ; my $arrived = waitpid $blackbox_pid ,0; ualarm 0; die "Assume waitpid return $arrived\n" ; }; print STDERR Dumper( $uinfo ) if $debug ; print STDERR Dumper( $timeinfo ) if $debug ; $child_errno = $!; $child_retv = $?; my $arrived = -1; if ($@ =~ /Assume waitpid return (.*)/){ $arrived = $1; } else { print_uinfo( 'INTERNAL' , $uinfo , $fulltime ); die "Fail: $@" ; } if ( $arrived == $blackbox_pid ){ # Child process terminated. # "Simulate" shell behavior, when signal code is returned as exit code. See http://www.gnu.org/software/bash/manual/html_node/Exit-Status.html for more info. $box_status = child_status_to_exit_code( $child_retv ); $status = 'exit' } elsif ( $arrived == -1){ # Something happened! print_uinfo( 'INTERNAL' , $uinfo ); print "timeout: WARNING: Wait($blackbox_pid) failed: $child_errno\n" ; exit 0; } else { # Check if limits are exhausted (they should be updated by signal handler). # First kill, then print the script's verdict, so that it's less likely to mingle with the output of the process being controlled. if ( my $reason = limits_exceeded()){ kill_process_group_safely( $blackbox_pid ); # have some sleep for output to be flushed usleep( $uflush_time ); print_uinfo( $reason , $uinfo ); # If we killed the child process, we may need to return its error code. if ( $confess ) { if ( waitpid ( $blackbox_pid ,0) != -1){ exit (child_status_to_exit_code($?)); } } else { exit 0; } } } } # 'FINISHED' string has a special meaning in print_uinfo! print_uinfo( 'FINISHED' , $uinfo ) if $info_on_success ; exit $box_status ; #----------------------------------------------- use POSIX; my $ticksize ; BEGIN { $ticksize = POSIX::sysconf(&POSIX::_SC_CLK_TCK) or die "Couldn't get ticksize" ;} # Function that traverses process tree (according to watchfor setting) and invokes the function supplied for each applicable process sub foreach_applicable_process { my ( $pgrp , $watchfor , $sub ) = @_ ; local $_ ; # Depending on whether we count time for process tree or for process group, we use different command. if ( $watchfor eq 'tree' ) { # Read ps output of a process tree, and read a subtree of the pid we watch for # The tree will look like this: # 26944 26944 kdeinit4 # 26944 26948 \_ klauncher # 26944 12501 \_ kio_pop3 # 26944 1591 \_ VirtualBox # 26944 1598 | \_ VirtualBox # 26944 1644 | \_ VBoxXPCOMIPCD # 26944 28333 \_ pidgin # 26944 28581 \_ kio_file # 26944 12496 kmail my $chars = "\t \\_|" ; my $PS_FH ; open $PS_FH , "-|" , qw(ps -e f -o pgrp= -o pid= -o vsz= -o ucmd=) or die "Bad open ps: $!" ; my $state = 0; # 0 - still haven't encounter root; 1 - reading tree; (when tree is read, we break the loop) my $initial_depth = undef ; # Initial depth of the root of a tree while (< $PS_FH >){ /^\s*([0-9]+)\s*([0-9]+)\s*([0-9]+)([ |\\_]+)(.*)/ or next ; # PID depth in process tree my ( $grp , $pid , $vsz , $depth_str , $cmd ) = ($1,$2,$3,$4,$5); if ( $state == 0){ # Still haven't encounter root, check if it's now $pid == $pgrp or next ; $state = 1; $initial_depth = length $depth_str ; } else { # Reading inside process tree, check if it's not over length $depth_str == $initial_depth and last ; } # Ok, this is a node in the tree we want to process $sub ->( $pid , $grp , $cmd , $vsz ); } close $PS_FH or die "Bad close ps: $!" ; } else { # Read ps output to get all processes within a group. Time output is not necessary, since we calculate it directly via /proc my $PS_FH ; open $PS_FH , "-|" , qw(ps -A -o pgrp= -o pid= -o vsz= -o ucmd=) or die "Bad open ps: $!" ; while (< $PS_FH >){ /^\s*([0-9]+)\s*([0-9]+)\s*([0-9]+)\s*(.*)/ or next ; my ( $grp , $pid , $vsz , $cmd ) = ($1,$2,$3); $grp == $pgrp or next ; $sub ->( $pid , $grp , $cmd , $vsz ); } close $PS_FH or die "Bad close ps: $!" ; } } sub hires_proc_runtime { my ( $pid ) = @_ ; my $stat = `cat /proc/ $pid / stat 2>/dev/null`; # Since we invoke this function quite often, process may decease betweem ps invocation and attempt to access its /proc entry. So, we return undef and handle it in the caller. That's also the reason of error redirection to void. return undef unless $stat ; # Parse proc stats--14th is utime, and it's expressed in ticks. my ( undef , undef , undef , undef , undef , undef , undef , undef , undef , undef , undef , undef , undef , $utime_ticks , $stime_ticks , $cum_utime_ticks , $cum_stime_ticks ) = split /\s+/, $stat ; return (( $utime_ticks + $stime_ticks )/ $ticksize , ( $cum_utime_ticks + $cum_stime_ticks )/ $ticksize ); } sub update_time { # Calculate the CPU+SYS time consumed by processes in the process group. Updates special timeinfo structure fur future calculations my ( $pgrp , $timeinfo ) = @_ ; # For one process, cumulative time is its runtime plus runtime of its dead children. Therefore, if we sum up cumulative times for all the eligible processes, we'll get the total runtime of the black box my $cumulative_time = 0; foreach_applicable_process( $pgrp , $watchfor , sub { my ( $pid , $grp , $cmd ) = @_ ; # If hires_proc_runtime doesn't return a value (the $pid died before it tried), we keep the old value of time. The error is not greater than ualarm interval. my ( $pid_time , $pid_cum_time ) = hires_proc_runtime( $pid ); if ( defined $pid_time ){ printf STDERR "timeout: pid $pid own $pid_time kids $pid_cum_time\n" if $debug ; $cumulative_time += $pid_time + $pid_cum_time ; } }); my $result = { prev_total => $timeinfo ->{total}, total => $cumulative_time , ticks_stale => ( $timeinfo ->{ticks_stale} || 0)}; # If the time didn't change, increase number of ticks the processes controlled are in a stale state. if ( $timeinfo ->{total} == $cumulative_time ) { $result ->{ticks_stale} ++; } return $result ; } sub update_memory { # Calculate the amount of memory consumed by the process group given my ( $pgrp ) = @_ ; my $result = 0; foreach_applicable_process( $pgrp , $watchfor , sub { my ( $pid , $grp , $cmd , $vsz ) = @_ ; $result += $vsz ; }); return $result ; } sub signal_to_process_group_safely { my ( $pgrp , $signal ) = @_ ; if ( $watchfor eq 'tree' ) { # We can't kill the whole process group, so we do the following trick. # We send SIGSTOP to all applicable processes. Since they could have spawned more kids between reading their PID from ps and sending signal, we repeat this step until all processes are stopped my $new_kids_spawned = 1; my %sent_to = (); while ( $new_kids_spawned ) { $new_kids_spawned = 0; foreach_applicable_process( $pgrp , $watchfor , sub { my ( $pid ) = @_ ; return if $sent_to { $pid }; $sent_to { $pid } = 1; $new_kids_spawned = 1; kill SIGSTOP, $pid ; }); } # Now all the controlled processes are stopped, we send them the signal we want foreach_applicable_process( $pgrp , $watchfor , sub { my ( $pid ) = @_ ; kill $signal , $pid ; }); # Continue the proccesses, so that they can process the signal handler foreach_applicable_process( $pgrp , $watchfor , sub { my ( $pid ) = @_ ; kill SIGCONT, $pid ; }); } else { # it's still unclear to me if there won't be a delay between catching signals in different processes when a signal is sent to a whole group. kill SIGSTOP, - $pgrp ; kill $signal , - $pgrp ; kill SIGCONT, - $pgrp ; } } sub kill_process_group_safely { my ( $pgrp ) = @_ ; # Show that we're dying, so that our timely alarm handler doesn't longjmp() control out of here $dying = 1; # Reset alarm handler (we need it for sleep to work) $SIG { 'ALRM' } = 'DEFAULT' ; print STDERR "timeout: Sending TERM\n" if $debug ; signal_to_process_group_safely( $pgrp ,SIGTERM) unless $just_kill ; sleep (1); print STDERR "timeout: Sending KILL\n" if $debug ; signal_to_process_group_safely( $pgrp ,SIGKILL); } sub update_info_by_ucmd { my ( $pgrp , $strpat ) = @_ ; local $_ ; # PIDs that are currently alive my %alive = (); # Collect times and commands of the processes that satisfy the patterns given to the $strpat foreach_applicable_process( $pgrp , $watchfor , sub { my ( $pid , $grp , $ucmd ) = @_ ; # Search process by pattern foreach my $key ( keys %{ $strpat }) { # NOTE that one pattern may match only one of these: either children or not children. That's used to avoid confusion if ( $ucmd =~ m/ $key /) { # Calculate proctime only for the matching processes my ( $proctime , $kidstime ) = hires_proc_runtime( $pid ); # If PID is dead, just don't set %alive for it making time info intact. Its time info will be reconciled later. if ( $proctime ){ $strpat ->{ $key }->{pids}->{ $pid }->{ptime} = $proctime ; $strpat ->{ $key }->{pids}->{ $pid }->{ucmd} = $ucmd ; $alive { $pid } = 1; } } elsif (( $key =~ /^CHILD/) && ( "CHILD:$ucmd" =~ m/ $key /)){ # Calculate proctime only for the matching processes my ( $proctime , $kidstime ) = hires_proc_runtime( $pid ); # If PID is dead, just don't set %alive for it making time info intact. Its time info will be reconciled later. if ( $kidstime ){ $strpat ->{ $key }->{pids}->{ $pid }->{ptime} = $kidstime ; $strpat ->{ $key }->{pids}->{ $pid }->{ucmd} = "CHILD:$ucmd" ; $alive { $pid } = 1; } } } }); # Calculate full time for each pattern for my $key ( keys %{ $strpat }) { my $sk = $strpat ->{ $key }; my $oldtime = $strpat ->{ $key }->{ptime} || 0; # ptime is a sum, and term_time is a total time of terminated PIDs # Increase the time of dead pids, and recalculate runtime of alive pids. my $term_time = $sk ->{term_time} || 0; my $ptime = 0; for my $pid ( keys %{ $sk ->{pids}}) { unless ( exists $alive { $pid }) { $term_time += ( $sk ->{pids}->{ $pid }->{ptime} || 0); delete $sk ->{pids}->{ $pid }; } else { $ptime += $sk ->{pids}->{ $pid }->{ptime}; } } $sk ->{ptime} = $ptime ; $sk ->{term_time} = $term_time ; } return undef ; } # # TODO: If the file already exists, and it contains two or more <time>...</time> # blocks with equals references and name # then we must be calculate summary time and write one <time>..</time> # block instead of more with equlas references. # It needs for rule-instrumentor, that execute aspectator two time for # one cc command. # sub print_uinfo { my $reason = shift ; # Print generic information to STDERR my $ticks = $timeinfo ->{ticks_stale} || 0; printf STDERR "${id_str}%s CPU %.2f MEM %d MAXMEM %d STALE %d\n" , $reason , $timeinfo ->{total}, $meminfo , $maxmem , ceil( $ticks / $frequency ) if ( $reason ne 'FINISHED' ) || $info_on_success ; if ( defined $output ){ open (FIL, ">>" , $output ) or die "Can't open output file: $!\n" ; } else { open (FIL, ">&STDERR" ); } my ( $strpat ) = @_ ; my $reftext = "" ; defined $reference and $reftext = "ref=\"$reference\" " ; # Sum up times for equal names my %name_val = (); foreach my $key ( keys %{ $strpat }) { my $sp = $strpat ->{ $key }; scalar keys %{ $sp ->{pids}} or $sp ->{term_time} or next ; $name_val { $sp ->{name}} ||= 0; $name_val { $sp ->{name}} += ( $sp ->{ptime} + $sp ->{term_time}); } for my $name ( keys %name_val ){ print (FIL "<time ${reftext}name=\"" . $name . "\">" . sprintf ( "%.0f" , 1000 *$name_val { $name }). "</time>\n" ); } defined $output and close FIL; } sub get_patterns { my ( $patterns_in_string ) = @_ ; if ( $patterns_in_string ){ my @splitted_patterns = split (/;/, $patterns_in_string ); my %patterns = (); foreach ( @splitted_patterns ) { my ( $pattern , $name ) = split (/,/, $_ ); printf STDERR "timeout: pattern $pattern for bucket $name initialized\n" if $debug ; $patterns { $pattern } = { name => $name , ptime =>0, pids =>{}}; } return { %patterns }; } else { return { '.*' => { name => 'ALL' , prtime =>0, pids =>{}}}; } } # Check if limits are exhaused, and return the reason why, if any. Otherwise, return undef. sub limits_exceeded { if ( defined $timelimit && $timeinfo ->{total} > $timelimit ){ return 'TIMEOUT' ; } elsif ( defined $ticklimit && $kill_stale && $timeinfo ->{ticks_stale} > $ticklimit ) { # Sometimes the controlling process may inherently hang up. Then we don't interrupt it. return 'HANGUP' ; } elsif ( defined $memlimit && $meminfo > $memlimit ){ return 'MEM' ; } return undef ; } # Convert child exit status to exit code. Follow Bash way. sub child_status_to_exit_code { my ( $child_retv ) = @_ ; if (( $child_retv > 0) && (( $child_retv >> 8) == 0)){ # The 8th bit indicates if the core was dumped. If it was not, we are to add 128 anyway, so just set the bit. return $child_retv | 128; } else { # This is also executed when there was no error, and the result is zero. return $child_retv >> 8; } } |