[Oar-commits] OAR branch 2.5 updated. 2.5.3+rc4-15-g74757eb

Nicolas Capit capitn at ff-scm-v4-prod.irisa.fr
Wed Apr 24 17:15:21 CEST 2013


The branch, 2.5 has been updated
       via  74757eb9d386149cf5535d8ab37c4dc0e08013b4 (commit)
       via  bf9c4d5dcc9cbe143198e0bcf8ecedf887157f37 (commit)
      from  4c8617d5a2c8fd032f5f6458643539de26ed4cae (commit)


- Log -----------------------------------------------------------------
commit 74757eb
Author: capitn <nicolas.capit at imag.fr>
Date:   Wed Apr 24 17:06:49 2013 +0200

    [scheduler] Fix race condition: resource state
    
    On G5K when the nodes are put to Absent in the job epilog then a race condition
    can occur if the scheduler launches a job before the resources are turned into
    Absent by the module node_change_state.pl.
    So some jobs can be deleted because the nodes become Absent which is not
    expected.
---
 sources/core/common-libs/lib/OAR/IO.pm |    3 ++-
 1 files changed, 2 insertions(+), 1 deletions(-)

diff --git a/sources/core/common-libs/lib/OAR/IO.pm b/sources/core/common-libs/lib/OAR/IO.pm
index 20ad45f..8acf571 100644
--- a/sources/core/common-libs/lib/OAR/IO.pm
+++ b/sources/core/common-libs/lib/OAR/IO.pm
@@ -6383,7 +6383,8 @@ sub get_gantt_jobs_to_launch($$){
                    AND g2.start_time <= $date
                    AND j.state = \'Waiting\'
                    AND resources.resource_id = g1.resource_id
-                   AND resources.state != \'Alive\'
+                   AND (resources.state IN (\'Dead\',\'Suspected\',\'Absent\')
+                        OR resources.next_state IN (\'Dead\',\'Suspected\',\'Absent\'))
               ";
     my $sth = $dbh->prepare($req);
     $sth->execute();
commit bf9c4d5
Author: capitn <nicolas.capit at imag.fr>
Date:   Wed Apr 24 16:50:37 2013 +0200

    [oarsub] jobproperties_applied_after_validation
    
    Add the variable $jobproperties_applied_after_validation. It can be used in an
    admission rule to add a constraint after the validation of the job. Ex:
    
        $jobproperties_applied_after_validation = "maintenance='off'";
    
    So, even if all the ressources have "maintenance='on'", the new job will be
    accepted but not scheduled now.
---
 CHANGELOG                              |    7 +++++
 sources/core/common-libs/lib/OAR/IO.pm |   41 +++++++++++++++++++++++++------
 2 files changed, 40 insertions(+), 8 deletions(-)

diff --git a/CHANGELOG b/CHANGELOG
index 0bbef9b..6932f1c 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -32,6 +32,13 @@ version 2.5.3:
     "oar_sched_gantt_with_timesharing_and_fairsharing_and_quotas")
   - Add comments in user job STDERR files to know if a job was killed or
     checkpointed
+  - Add the variable $jobproperties_applied_after_validation. It can be used in
+    an admission rule to add a constraint after the validation of the job. Ex:
+    
+        $jobproperties_applied_after_validation = "maintenance='off'";
+    
+    So, even if all the ressources have "maintenance='on'", the new jobs will be
+    accepted but not scheduled now.
 
 version 2.5.2:
 --------------
diff --git a/sources/core/common-libs/lib/OAR/IO.pm b/sources/core/common-libs/lib/OAR/IO.pm
index 52a5f59..20ad45f 100644
--- a/sources/core/common-libs/lib/OAR/IO.pm
+++ b/sources/core/common-libs/lib/OAR/IO.pm
@@ -1369,6 +1369,10 @@ sub add_micheline_job($$$$$$$$$$$$$$$$$$$$$$$$$$$$$){
         $rules = $rules.$ref->{'rule'};
     }
     $sth->finish();
+    # This variable is used to add some resources properties restrictions but
+    # after the validation (job is queued even if there are not enough
+    # resources availbale)
+    my $jobproperties_applied_after_validation = "";
     #Apply rules
     eval $rules;
     if ($@) {
@@ -1398,7 +1402,7 @@ sub add_micheline_job($$$$$$$$$$$$$$$$$$$$$$$$$$$$$){
     my @Job_id_list;
     if (($array_job_nb>1)  and (not defined($use_job_key))) { #to test  add_micheline_simple_array_job
       warn("Simple array job submission is used\n"); 
-      my $simple_job_id_list_ref = add_micheline_simple_array_job_non_contiguous($dbh, $dbh_ro, $jobType, $ref_resource_list, \@array_job_commands, $infoType, $queue_name, $jobproperties, $startTimeReservation, $idFile, $checkpoint, $checkpoint_signal, $notify, $job_name,$job_env,$type_list,$launching_directory,$anterior_ref,$stdout,$stderr,$job_hold,$project,$initial_request_string, $array_id, $user, $reservationField, $startTimeJob, $default_walltime, $array_index);
+      my $simple_job_id_list_ref = add_micheline_simple_array_job_non_contiguous($dbh, $dbh_ro, $jobType, $ref_resource_list, \@array_job_commands, $infoType, $queue_name, $jobproperties, $startTimeReservation, $idFile, $checkpoint, $checkpoint_signal, $notify, $job_name,$job_env,$type_list,$launching_directory,$anterior_ref,$stdout,$stderr,$job_hold,$project,$initial_request_string, $array_id, $user, $reservationField, $startTimeJob, $default_walltime, $array_index, $jobproperties_applied_after_validation);
     return($simple_job_id_list_ref);
     } else {
       # single job to submit and when job key is used with array job 
@@ -1408,7 +1412,7 @@ sub add_micheline_job($$$$$$$$$$$$$$$$$$$$$$$$$$$$$){
           push(@Job_id_list, $err);
           return(\@Job_id_list);
         }
-        push(@Job_id_list, add_micheline_subjob($dbh, $dbh_ro, $jobType, $ref_resource_list, $command, $infoType, $queue_name, $jobproperties, $startTimeReservation, $idFile, $checkpoint, $checkpoint_signal, $notify, $job_name,$job_env,$type_list,$launching_directory,$anterior_ref,$stdout,$stderr,$job_hold,$project,$ssh_priv_key,$ssh_pub_key,$initial_request_string, $array_id, $user, $reservationField, $startTimeJob, $default_walltime, $array_index));
+        push(@Job_id_list, add_micheline_subjob($dbh, $dbh_ro, $jobType, $ref_resource_list, $command, $infoType, $queue_name, $jobproperties, $startTimeReservation, $idFile, $checkpoint, $checkpoint_signal, $notify, $job_name,$job_env,$type_list,$launching_directory,$anterior_ref,$stdout,$stderr,$job_hold,$project,$ssh_priv_key,$ssh_pub_key,$initial_request_string, $array_id, $user, $reservationField, $startTimeJob, $default_walltime, $array_index, $jobproperties_applied_after_validation));
         if ($Job_id_list[-1] <= 0){
           return(\@Job_id_list);
         } else {
@@ -1487,8 +1491,8 @@ sub format_job_message_text($$$$$$$$$){
     return($job_message);
 }
 
-sub add_micheline_subjob($$$$$$$$$$$$$$$$$$$$$$$$$$$$$$){
-    my ($dbh, $dbh_ro, $jobType, $ref_resource_list, $command, $infoType, $queue_name, $jobproperties, $startTimeReservation, $idFile, $checkpoint, $checkpoint_signal, $notify, $job_name,$job_env,$type_list,$launching_directory,$anterior_ref,$stdout,$stderr,$job_hold,$project,$ssh_priv_key,$ssh_pub_key,$initial_request_string, $array_id, $user, $reservationField, $startTimeJob, $default_walltime, $array_index) = @_;
+sub add_micheline_subjob($$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$){
+    my ($dbh, $dbh_ro, $jobType, $ref_resource_list, $command, $infoType, $queue_name, $jobproperties, $startTimeReservation, $idFile, $checkpoint, $checkpoint_signal, $notify, $job_name,$job_env,$type_list,$launching_directory,$anterior_ref,$stdout,$stderr,$job_hold,$project,$ssh_priv_key,$ssh_pub_key,$initial_request_string, $array_id, $user, $reservationField, $startTimeJob, $default_walltime, $array_index, $jobproperties_applied_after_validation) = @_;
 
     # Test if properties and resources are coherent
     my @dead_resources;
@@ -1535,6 +1539,13 @@ sub add_micheline_subjob($$$$$$$$$$$$$$$$$$$$$$$$$$$$$$){
         }
         $estimated_walltime = $moldable_resource->[1] if ($estimated_walltime == 0);
     }
+    if ($jobproperties_applied_after_validation ne ""){
+        if ($jobproperties ne ""){
+            $jobproperties = "($jobproperties) AND $jobproperties_applied_after_validation";
+        }else{
+            $jobproperties = "$jobproperties_applied_after_validation";
+        }
+    }
 
     lock_table($dbh,["challenges","jobs"]);
     # Verify the content of the ssh keys
@@ -1684,8 +1695,8 @@ sub add_micheline_subjob($$$$$$$$$$$$$$$$$$$$$$$$$$$$$$){
 # /!\ this function supposes that database engine provides contiguous id when multiple inserts query is executed (Postgres doesn't provide this)
 # 
 
-sub add_micheline_simple_array_job ($$$$$$$$$$$$$$$$$$$$$$$$$$$$){
-    my ($dbh, $dbh_ro, $jobType, $ref_resource_list, $array_job_commands_ref, $infoType, $queue_name, $jobproperties, $startTimeReservation, $idFile, $checkpoint, $checkpoint_signal, $notify, $job_name,$job_env,$type_list,$launching_directory,$anterior_ref,$stdout,$stderr,$job_hold,$project,$initial_request_string, $array_id, $user, $reservationField, $startTimeJob, $default_walltime, $array_index) = @_;
+sub add_micheline_simple_array_job ($$$$$$$$$$$$$$$$$$$$$$$$$$$$$){
+    my ($dbh, $dbh_ro, $jobType, $ref_resource_list, $array_job_commands_ref, $infoType, $queue_name, $jobproperties, $startTimeReservation, $idFile, $checkpoint, $checkpoint_signal, $notify, $job_name,$job_env,$type_list,$launching_directory,$anterior_ref,$stdout,$stderr,$job_hold,$project,$initial_request_string, $array_id, $user, $reservationField, $startTimeJob, $default_walltime, $array_index, $jobproperties_applied_after_validation) = @_;
 
     my @Job_id_list;
 
@@ -1749,6 +1760,13 @@ sub add_micheline_simple_array_job ($$$$$$$$$$$$$$$$$$$$$$$$$$$$){
         }
         $estimated_walltime = $moldable_resource->[1] if ($estimated_walltime == 0);
     }
+    if ($jobproperties_applied_after_validation ne ""){
+        if ($jobproperties ne ""){
+            $jobproperties = "($jobproperties) AND $jobproperties_applied_after_validation";
+        }else{
+            $jobproperties = "$jobproperties_applied_after_validation";
+        }
+    }
 
     my $job_message = format_job_message_text($job_name,$estimated_nb_resources, $estimated_walltime, $jobType, $reservationField, $queue_name, $project, $type_list, '');
 
@@ -1940,8 +1958,8 @@ sub add_micheline_simple_array_job ($$$$$$$$$$$$$$$$$$$$$$$$$$$$){
 # This function doesn't imply that database engine must provides contiguous id when multiple inserts query is executed (Postgres doesn't provide this)
 # 
 
-sub add_micheline_simple_array_job_non_contiguous ($$$$$$$$$$$$$$$$$$$$$$$$$$$$){
-    my ($dbh, $dbh_ro, $jobType, $ref_resource_list, $array_job_commands_ref, $infoType, $queue_name, $jobproperties, $startTimeReservation, $idFile, $checkpoint, $checkpoint_signal, $notify, $job_name,$job_env,$type_list,$launching_directory,$anterior_ref,$stdout,$stderr,$job_hold,$project,$initial_request_string, $array_id, $user, $reservationField, $startTimeJob, $default_walltime, $array_index) = @_;
+sub add_micheline_simple_array_job_non_contiguous ($$$$$$$$$$$$$$$$$$$$$$$$$$$$$){
+    my ($dbh, $dbh_ro, $jobType, $ref_resource_list, $array_job_commands_ref, $infoType, $queue_name, $jobproperties, $startTimeReservation, $idFile, $checkpoint, $checkpoint_signal, $notify, $job_name,$job_env,$type_list,$launching_directory,$anterior_ref,$stdout,$stderr,$job_hold,$project,$initial_request_string, $array_id, $user, $reservationField, $startTimeJob, $default_walltime, $array_index, $jobproperties_applied_after_validation) = @_;
 
     my @Job_id_list = ();
     my $nb_jobs = $#{$array_job_commands_ref}+1;
@@ -2006,6 +2024,13 @@ sub add_micheline_simple_array_job_non_contiguous ($$$$$$$$$$$$$$$$$$$$$$$$$$$$)
         }
         $estimated_walltime = $moldable_resource->[1] if ($estimated_walltime == 0);
     }
+    if ($jobproperties_applied_after_validation ne ""){
+        if ($jobproperties ne ""){
+            $jobproperties = "($jobproperties) AND $jobproperties_applied_after_validation";
+        }else{
+            $jobproperties = "$jobproperties_applied_after_validation";
+        }
+    }
 
     my $job_message = format_job_message_text($job_name,$estimated_nb_resources, $estimated_walltime, $jobType, $reservationField, $queue_name, $project, $type_list, '');
 
-----------------------------------------------------------------------

Summary of changes:
 CHANGELOG                              |    7 +++++
 sources/core/common-libs/lib/OAR/IO.pm |   44 +++++++++++++++++++++++++------
 2 files changed, 42 insertions(+), 9 deletions(-)


hooks/post-receive
-- 
OAR



More information about the Oar-commits mailing list