code sync

This commit is contained in:
Daniel Eisenberg 2025-02-10 21:30:14 +02:00
parent 85dbcf4714
commit 613c878568
4 changed files with 153 additions and 4 deletions

View File

@ -98,19 +98,19 @@ while true; do
init=true init=true
/etc/cp/watchdog/cp-nano-watchdog >/dev/null 2>&1 & /etc/cp/watchdog/cp-nano-watchdog >/dev/null 2>&1 &
sleep 5 sleep 5
active_watchdog_pid=$(pgrep -f -x -o "/bin/bash /etc/cp/watchdog/cp-nano-watchdog") active_watchdog_pid=$(pgrep -f -x -o "/bin/(bash|sh) /etc/cp/watchdog/cp-nano-watchdog")
fi fi
current_watchdog_pid=$(pgrep -f -x -o "/bin/bash /etc/cp/watchdog/cp-nano-watchdog") current_watchdog_pid=$(pgrep -f -x -o "/bin/(bash|sh) /etc/cp/watchdog/cp-nano-watchdog")
if [ ! -f /tmp/restart_watchdog ] && [ "$current_watchdog_pid" != "$active_watchdog_pid" ]; then if [ ! -f /tmp/restart_watchdog ] && [ "$current_watchdog_pid" != "$active_watchdog_pid" ]; then
echo "Error: Watchdog exited abnormally" echo "Error: Watchdog exited abnormally"
exit 1 exit 1
elif [ -f /tmp/restart_watchdog ]; then elif [ -f /tmp/restart_watchdog ]; then
rm -f /tmp/restart_watchdog rm -f /tmp/restart_watchdog
kill -9 "$(pgrep -f -x -o "/bin/bash /etc/cp/watchdog/cp-nano-watchdog")" kill -9 "$(pgrep -f -x -o "/bin/(bash|sh) /etc/cp/watchdog/cp-nano-watchdog")"
/etc/cp/watchdog/cp-nano-watchdog >/dev/null 2>&1 & /etc/cp/watchdog/cp-nano-watchdog >/dev/null 2>&1 &
sleep 5 sleep 5
active_watchdog_pid=$(pgrep -f -x -o "/bin/bash /etc/cp/watchdog/cp-nano-watchdog") active_watchdog_pid=$(pgrep -f -x -o "/bin/(bash|sh) /etc/cp/watchdog/cp-nano-watchdog")
fi fi
sleep 5 sleep 5

View File

@ -26,6 +26,7 @@ install(FILES configuration/cp-nano-orchestration-debug-conf.json DESTINATION ./
install(FILES watchdog/watchdog DESTINATION ./orchestration/watchdog/ PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ) install(FILES watchdog/watchdog DESTINATION ./orchestration/watchdog/ PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ)
install(FILES watchdog/wait-for-networking-inspection-modules.sh DESTINATION ./orchestration/watchdog/ PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ) install(FILES watchdog/wait-for-networking-inspection-modules.sh DESTINATION ./orchestration/watchdog/ PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ)
install(FILES watchdog/access_pre_init DESTINATION ./orchestration/watchdog/ PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ) install(FILES watchdog/access_pre_init DESTINATION ./orchestration/watchdog/ PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ)
install(FILES watchdog/revert_orchestrator_version.sh DESTINATION ./orchestration/watchdog/ PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ)
install(FILES local-default-policy.yaml DESTINATION ./orchestration/ PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ) install(FILES local-default-policy.yaml DESTINATION ./orchestration/ PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ)
install(FILES open-appsec-cloud-mgmt DESTINATION ./orchestration/ PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ) install(FILES open-appsec-cloud-mgmt DESTINATION ./orchestration/ PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ)

View File

@ -0,0 +1,79 @@
#!/bin/sh
SCRIPT_FOLDER=$(dirname "$0")
PARENT_FOLDER=$(dirname "$SCRIPT_FOLDER")
FILESYSTEM_PATH=$PARENT_FOLDER
UPGRADE_STATUS_FILE=${FILESYSTEM_PATH}/revert/upgrade_status
FORBIDDEN_VERSIONS_FILE=${FILESYSTEM_PATH}/revert/forbidden_versions
LAST_KNOWN_WORKING_ORCHESTRATOR=${FILESYSTEM_PATH}/revert/last_known_working_orchestrator
LOG_FILE=$1
CONFIG_FILE="${FILESYSTEM_PATH}/conf/cp-nano-orchestration-conf.json"
get_configuration_with_default()
{
section="$1"
key="$2"
default_value="$3"
local value
value=$(awk -v section="$section" -v k="$key" -v def_value="$default_value" '
BEGIN {
found_section=0;
found_key=0;
}
$0 ~ "\"" section "\"" { found_section=1; next; }
found_section && $0 ~ "\"" k "\"" {
found_key=1;
next;
}
found_key && $0 ~ /"value"/ {
match($0, /"value"[[:space:]]*:[[:space:]]*"?([^",}]*)"?/, arr);
if (arr[1] != "")
print arr[1];
exit;
}
found_section && $0 ~ /^\}/ { found_section=0; found_key=0; }
END {
if (!found_key) print def_value;
}
' "$CONFIG_FILE")
echo "$value"
}
log()
{
curr_date_time=$(date +%Y-%m-%dT%H:%M:%S)
callee_function=${1}
echo "[${curr_date_time}@${callee_function}] ${2}" >>${LOG_FILE}
}
if [ -f "$UPGRADE_STATUS_FILE" ]; then
awk '{print $2}' "$UPGRADE_STATUS_FILE" >> "$FORBIDDEN_VERSIONS_FILE"
cp "$UPGRADE_STATUS_FILE" ${FILESYSTEM_PATH}/revert/failed_upgrade_info
fi
if [ -f "$LAST_KNOWN_WORKING_ORCHESTRATOR" ]; then
manifest_file_path=$(get_configuration_with_default "orchestration" "Manifest file path" "${FILESYSTEM_PATH}/conf/manifest.json")
cp ${FILESYSTEM_PATH}/revert/last_known_manifest "$manifest_file_path"
to_version=$(awk '{print $2}' "$UPGRADE_STATUS_FILE")
last_known_orch_version=$($LAST_KNOWN_WORKING_ORCHESTRATOR --version)
log "revert_orchestrator_version.sh" "Reverting orchestration version $to_version to last known working orchestrator (version: $last_known_orch_version)"
installation_flags="--install"
trusted_ca_directory=$(get_configuration_with_default "message" "Trusted CA directory" "")
if [ -n "$trusted_ca_directory" ]; then
installation_flags="${installation_flags} --certs-dir ${trusted_ca_directory}"
fi
if grep -q '^CP_VS_ID=' ${FILESYSTEM_PATH}/conf/environment-details.cfg; then
cp_vs_id=$(grep '^CP_VS_ID=' "$config_file" | cut -d'=' -f2)
installation_flags="${installation_flags} --vs_id ${cp_vs_id}"
fi
chmod +x ${LAST_KNOWN_WORKING_ORCHESTRATOR}
$LAST_KNOWN_WORKING_ORCHESTRATOR ${installation_flags}
else
log "revert_orchestrator_version.sh" "Last known working orchestrator not found"
exit 1
fi

View File

@ -36,6 +36,8 @@ TMP_VOL_SRVS_FILE_PRE_DEL=watchdog/wd.volatile_services.del
SRVS_HALTED=watchdog/wd.services.halt SRVS_HALTED=watchdog/wd.services.halt
SERVICE_LOG_FILE_TTL_MINUTES=10080 SERVICE_LOG_FILE_TTL_MINUTES=10080
PIDOF_CMD_EXISTS=0 PIDOF_CMD_EXISTS=0
CONFIG_FILE="${FILESYSTEM_PATH}/conf/cp-nano-orchestration-conf.json"
SETTINGS_FILE="${FILESYSTEM_PATH}/conf/settings.json"
env_details_file=conf/environment-details.cfg env_details_file=conf/environment-details.cfg
@ -48,6 +50,41 @@ VS_EVAL_PREFIX=
var_service_startup= var_service_startup=
var_upgarde=false var_upgarde=false
get_profile_agent_setting_with_default() {
key="$1"
default_value="$2"
value=$(grep -oP "\"key\":\s*\"$key\".*?\"value\":\s*\"[^\"]+\"" $SETTINGS_FILE | sed -E 's/.*"value":\s*"([^"]+)".*/\1/')
if [ "$value" = "null" ] || [ -z "$value" ]; then
echo "$default_value"
else
echo "$value"
fi
}
MAX_ORCH_RESTARTS=$(get_profile_agent_setting_with_default "maxOrchestrationRestartsWithinThreeMin" "10")
MAX_AGE_MINUTES=$(get_profile_agent_setting_with_default "upgradeProcessTimeoutMin" "90")
MAX_AGE_SECONDS=$((MAX_AGE_MINUTES * 60))
update_orchestrations_counters()
{
current_time=$(date +%s)
elapsed_time=$((current_time - last_update))
intervals_passed=$((elapsed_time / interval_duration))
if [ "$intervals_passed" -gt 0 ]; then
shifts=$((intervals_passed > 3 ? 3 : intervals_passed))
for _ in $(seq 1 "$shifts"); do
orch_counters="0 $(echo "$orch_counters" | cut -d' ' -f1-2)"
done
last_update=$((last_update + intervals_passed * interval_duration))
fi
first=$(echo "$orch_counters" | cut -d' ' -f1)
rest=$(echo "$orch_counters" | cut -d' ' -f2-)
first=$((first + 1))
orch_counters="$first $rest"
}
get_basename() get_basename()
{ {
is_basename="$(command -v basename)" is_basename="$(command -v basename)"
@ -830,6 +867,16 @@ load_services()
else else
var_service_startup=false var_service_startup=false
fi fi
crashes_revert=$(get_profile_agent_setting_with_default "allowCrashesRevert" "true")
if [ "$crashes_revert" = "true" ] && [ "$(get_basename $service)" = "cp-nano-orchestration" ] && [ -f ${FILESYSTEM_PATH}/revert/upgrade_status ]; then
update_orchestrations_counters
total_orch_restarts=$(echo "$orch_counters" | awk '{print $1 + $2 + $3}')
log "load_services" "orchestrator restart no. ${total_orch_restarts}"
if [ "$total_orch_restarts" -ge "$MAX_ORCH_RESTARTS" ]; then
${SCRIPT_FOLDER}/revert_orchestrator_version.sh ${LOG_FILE_PATH}/$LOG_FILE
fi
fi
run_service $service $gaia_ld_path run_service $service $gaia_ld_path
increment_watchdog_process_restart_counter increment_watchdog_process_restart_counter
echo "running" > $AGENT_RUN_STATUS_FILE echo "running" > $AGENT_RUN_STATUS_FILE
@ -1010,6 +1057,12 @@ else
fi fi
IS_SERVICE_STARTED=false IS_SERVICE_STARTED=false
echo "" >${FILESYSTEM_PATH}/$SRVS_HALTED echo "" >${FILESYSTEM_PATH}/$SRVS_HALTED
last_update=$(date +%s)
interval_duration=60
orch_counters="0 0 0"
iteration_count=0
while $(true); do while $(true); do
if [ -z $IS_CONTAINER_ENV ] && [ -f ${FILESYSTEM_PATH}/orchestration/restart_watchdog ]; then if [ -z $IS_CONTAINER_ENV ] && [ -f ${FILESYSTEM_PATH}/orchestration/restart_watchdog ]; then
rm -f ${FILESYSTEM_PATH}/orchestration/restart_watchdog rm -f ${FILESYSTEM_PATH}/orchestration/restart_watchdog
@ -1028,5 +1081,21 @@ while $(true); do
rotate_service_log rotate_service_log
daily_log_files_cleanup daily_log_files_cleanup
file_age_revert=$(get_profile_agent_setting_with_default "allowFileAgeRevert" "false")
iteration_count=$((iteration_count + 1))
if [ $((iteration_count % 10)) -eq 0 ]; then
if [ "$file_age_revert" = "true" ] && [ -f ${FILESYSTEM_PATH}/revert/upgrade_status ]; then
file_mtime=$(stat -c %Y "${FILESYSTEM_PATH}/revert/upgrade_status")
current_time=$(date +%s)
file_age=$((current_time - file_mtime))
if [ "$file_age" -gt "$MAX_AGE_SECONDS" ]; then
log "monitor_upgrade_status_file_age" "The file has existed for more than $MAX_AGE_MINUTES minutes."
${SCRIPT_FOLDER}/revert_orchestrator_version.sh ${LOG_FILE_PATH}/$LOG_FILE
fi
fi
fi
sleep 5 sleep 5
done done