Skip to content

(MAINT) test vmpooler #1026

(MAINT) test vmpooler

(MAINT) test vmpooler #1026

---
name: Install test matrix
on:
pull_request:
paths:
- .github/workflows/**/*
- spec/**/*
- lib/**/*
- tasks/**/*
- functions/**/*
- types/**/*
- plans/**/*
- hiera/**/*
- manifests/**/*
- templates/**/*
- files/**/*
- metadata.json
- Rakefile
- Gemfile
- provision.yaml
- .rspec
- .rubocop.yml
- .puppet-lint.rc
- .fixtures.yml
branches: [main]
workflow_dispatch: {}
jobs:
test-install:
name: PE ${{ matrix.version }} ${{ matrix.architecture }} on ${{ matrix.image }}
runs-on: ubuntu-latest
env:
BOLT_GEM: true
BOLT_DISABLE_ANALYTICS: true
LANG: en_US.UTF-8
strategy:
fail-fast: false
matrix:
architecture: [standard-with-dr]
version: [2025.6.0]
image: [litmusimage/ubuntu:24.04]
steps:
- name: Checkout Source
uses: actions/checkout@v4
- name: Activate Ruby 3.1
uses: ruby/setup-ruby@v1
with:
ruby-version: '3.1'
bundler-cache: true
- name: Print bundle environment
if: ${{ github.repository_owner == 'puppetlabs' }}
run: |
echo ::group::info:bundler
bundle env
echo ::endgroup::
- name: Provision test cluster
timeout-minutes: 15
run: |
echo ::group::prepare
mkdir -p $HOME/.ssh
echo 'Host *' > $HOME/.ssh/config
echo ' ServerAliveInterval 150' >> $HOME/.ssh/config
echo ' ServerAliveCountMax 2' >> $HOME/.ssh/config
echo ' StrictHostKeyChecking no' >> $HOME/.ssh/config
echo ' UserKnownHostsFile /dev/null' >> $HOME/.ssh/config
echo ' ConnectTimeout 30' >> $HOME/.ssh/config
echo ' ConnectionAttempts 10' >> $HOME/.ssh/config
bundle exec rake spec_prep
echo ::endgroup::
echo ::group::provision
bundle exec bolt plan run peadm_spec::provision_test_cluster \
--modulepath spec/fixtures/modules \
provider=docker \
image=${{ matrix.image }} \
architecture=${{ matrix.architecture }} \
--log-level trace
echo ::endgroup::
echo ::group::list modules
ls -l ./spec/fixtures/modules || true; echo
echo ::endgroup::
echo ::group::show original inventory
echo "=== Original inventory.yaml ==="
cat ./inventory.yaml || echo "No inventory.yaml found"
echo ::endgroup::
echo ::group::create docker transport inventory
echo "=== Creating Docker transport inventory ==="
# Get container information
container_info=($(docker ps --format "{{.Names}}" | grep -v NAMES))
container_count=${#container_info[@]}
# Create new inventory with version 2 format and Docker transport
cat > ./inventory.yaml << 'EOF'
version: 2
groups:
- name: pe_nodes
targets:
EOF
# Assign roles based on architecture
case "${{ matrix.architecture }}" in
"standard-with-dr")
if [ $container_count -ge 2 ]; then
# Primary
echo " - name: ${container_info[0]}" >> ./inventory.yaml
echo " uri: ${container_info[0]}" >> ./inventory.yaml
echo " vars:" >> ./inventory.yaml
echo " role: primary" >> ./inventory.yaml
echo " config:" >> ./inventory.yaml
echo " transport: docker" >> ./inventory.yaml
echo " docker:" >> ./inventory.yaml
echo " container: ${container_info[0]}" >> ./inventory.yaml
echo " tmpdir: /var/tmp" >> ./inventory.yaml
# Replica
echo " - name: ${container_info[1]}" >> ./inventory.yaml
echo " uri: ${container_info[1]}" >> ./inventory.yaml
echo " vars:" >> ./inventory.yaml
echo " role: replica" >> ./inventory.yaml
echo " config:" >> ./inventory.yaml
echo " transport: docker" >> ./inventory.yaml
echo " docker:" >> ./inventory.yaml
echo " container: ${container_info[1]}" >> ./inventory.yaml
echo " tmpdir: /var/tmp" >> ./inventory.yaml
fi
;;
# ... other architecture cases ...
esac
echo "=== New Docker transport inventory.yaml ==="
cat ./inventory.yaml
echo ::endgroup::
echo ::group::info:request
cat request.json || true; echo
echo ::endgroup::
- name: Debug container setup
run: |
echo ::group::debug_container_setup
for container in $(docker ps --format "table {{.Names}}" | tail -n +2); do
echo "=== Container: $container ==="
# Check if SSH service is actually running
echo "SSH service status:"
docker exec $container systemctl is-active ssh || docker exec $container service ssh status || echo "SSH service check failed"
# Check SSH daemon logs
echo "SSH daemon logs:"
docker exec $container journalctl -u ssh --no-pager -n 10 || docker exec $container tail -10 /var/log/auth.log || echo "No SSH logs found"
# Check root password is set
echo "Root account status:"
docker exec $container passwd -S root || echo "Cannot check root account"
# Try to manually set root password
echo "Setting root password to 'root':"
docker exec $container bash -c 'echo "root:root" | chpasswd' || echo "Failed to set root password"
# Check authorized_keys
echo "Authorized keys:"
docker exec $container cat /root/.ssh/authorized_keys 2>/dev/null || echo "No authorized_keys file"
# Check if we can exec into container
echo "Test container exec:"
docker exec $container whoami || echo "Cannot exec into container"
done
echo ::endgroup::
- name: Debug SSH connectivity
run: |
echo ::group::debug_ssh_connectivity
# Install sshpass for password authentication
sudo apt-get update && sudo apt-get install -y sshpass
# Check if containers are running
docker ps
# Check SSH processes in containers
for container in $(docker ps --format "table {{.Names}}" | tail -n +2); do
echo "Testing SSH to container: $container"
echo "SSH processes in $container:"
docker exec $container ps aux | grep sshd || echo "No sshd processes found in $container"
# Test SSH connectivity directly with password
echo "Testing direct SSH connection to $container:"
container_ip=$(docker inspect $container | jq -r '.[0].NetworkSettings.IPAddress')
echo "Container IP: $container_ip"
# Get the mapped SSH port
ssh_port=$(docker port $container 22 | cut -d: -f2)
echo "SSH port mapping: localhost:$ssh_port -> $container:22"
# Test SSH connection with sshpass and root password
echo "Testing SSH with sshpass and root/root:"
timeout 10 sshpass -p "root" ssh -o StrictHostKeyChecking=no -o ConnectTimeout=5 -p $ssh_port root@localhost 'echo "SSH connection successful to '$container'"' || echo "SSH connection failed to $container"
# Also check what's actually listening on the SSH port
echo "Checking what's listening on port $ssh_port:"
netstat -ln | grep ":$ssh_port " || echo "Nothing listening on port $ssh_port"
# Check SSH config inside container
echo "SSH configuration in $container:"
docker exec $container cat /etc/ssh/sshd_config | grep -E "(PasswordAuthentication|PermitRootLogin|PubkeyAuthentication)" || true
# Check if root account is set up correctly
echo "Root account info in $container:"
docker exec $container passwd -S root || true
done
echo ::endgroup::
- name: Wait for Docker connectivity to be ready
run: |
echo ::group::wait_for_docker
# Wait for Docker connectivity to be available on all containers via Bolt
for i in {1..12}; do
echo "Attempt $i: Testing Bolt Docker connectivity..."
# Test Docker transport connectivity
if bundle exec bolt command run 'echo "Bolt Docker test successful"' \
--inventoryfile ./inventory.yaml \
--targets pe_nodes; then
echo "All containers are accessible via Bolt Docker transport!"
break
fi
if [ $i -eq 12 ]; then
echo "Containers failed to become accessible after 12 attempts"
echo "Final inventory check:"
cat ./inventory.yaml
echo "=== Docker container status ==="
docker ps
exit 1
fi
echo "Waiting 5 seconds before retry..."
sleep 5
done
echo ::endgroup::
- name: Check container SSH configuration
run: |
echo ::group::container_ssh_config
for container in $(docker ps --format "table {{.Names}}" | tail -n +2); do
echo "=== SSH config for $container ==="
docker exec $container cat /etc/ssh/sshd_config | grep -E "(PasswordAuthentication|PubkeyAuthentication|PermitRootLogin)" || true
echo "=== Test password auth ==="
ssh_port=$(docker port $container 22 | cut -d: -f2)
# Try common passwords
for password in "root" "password" "litmus"; do
echo "Trying password: $password"
timeout 5 sshpass -p "$password" ssh -o StrictHostKeyChecking=no -o ConnectTimeout=5 -p $ssh_port root@localhost 'echo "Password auth successful with: $password"' && break
done || echo "Password authentication failed"
done
echo ::endgroup::
- name: Prepare containers for Bolt tasks
run: |
echo ::group::prepare_containers
for container in $(docker ps --format "{{.Names}}" | grep -v NAMES); do
echo "=== Preparing container: $container ==="
# Install required packages for Ubuntu 24.04
echo "Installing required packages:"
docker exec $container apt-get update || echo "apt-get update failed"
docker exec $container apt-get install -y \
curl \
wget \
ca-certificates \
hostname \
procps \
util-linux \
systemd \
openssh-server \
openssh-client \
tar \
gzip \
net-tools \
iproute2 \
file \
gettext-base \
findutils \
coreutils \
grep \
sed \
gawk \
bash \
systemd-resolved \
dnsutils \
iputils-ping \
gnupg \
gnupg2 \
gpg-agent \
apt-utils \
lsb-release \
software-properties-common || echo "Package installation failed"
# Ensure critical commands are available
echo "Testing required commands:"
docker exec $container hostname || echo "hostname command failed"
docker exec $container hostname -f || echo "hostname -f command failed"
docker exec $container file --version || echo "file command failed"
docker exec $container tar --version || echo "tar command failed"
docker exec $container which systemctl || echo "systemctl not found"
docker exec $container which gpg || echo "gpg not found"
docker exec $container which apt-key || echo "apt-key not found"
# Ensure /tmp directory exists and has correct permissions
echo "Creating and setting permissions for /tmp directory:"
docker exec $container mkdir -p /tmp
docker exec $container chmod 1777 /tmp
docker exec $container ls -ld /tmp
# Ensure /var/tmp exists with correct permissions
echo "Creating and setting permissions for /var/tmp directory:"
docker exec $container mkdir -p /var/tmp
docker exec $container chmod 1777 /var/tmp
docker exec $container ls -ld /var/tmp
# Create a test file to verify write permissions
echo "Testing write permissions:"
docker exec $container touch /var/tmp/test_write_permissions || echo "Failed to create test file"
docker exec $container rm -f /var/tmp/test_write_permissions || echo "Failed to remove test file"
# Ensure other required directories exist
echo "Creating other required directories:"
docker exec $container mkdir -p /var/log
docker exec $container mkdir -p /etc/puppetlabs
docker exec $container mkdir -p /opt/puppetlabs
docker exec $container mkdir -p /etc/systemd/system
docker exec $container mkdir -p /etc/apt/trusted.gpg.d
# Set up proper hostname resolution
echo "Setting up hostname resolution:"
container_hostname=$(docker exec $container hostname 2>/dev/null || echo "localhost")
docker exec $container bash -c "echo '127.0.0.1 $container_hostname localhost' >> /etc/hosts"
# Initialize systemd properly (important for PE installer)
echo "Ensuring systemd is working:"
docker exec $container systemctl daemon-reload || echo "systemctl daemon-reload failed"
# Check available disk space
echo "Disk space check:"
docker exec $container df -h /var/tmp || echo "Cannot check disk space"
# Test pe_install.sh specific requirements
echo "Testing pe_install.sh requirements:"
docker exec $container which file || echo "file command missing - PE installer will fail"
docker exec $container which tar || echo "tar command missing"
docker exec $container which systemctl || echo "systemctl missing"
docker exec $container which bash || echo "bash missing"
docker exec $container which gpg || echo "gpg missing - apt-key will fail"
docker exec $container which apt-key || echo "apt-key missing"
# Test if systemd is working properly
echo "Testing systemd functionality:"
docker exec $container systemctl --version || echo "systemctl not working"
docker exec $container systemctl daemon-reload || echo "daemon-reload failed"
# Test GPG functionality
echo "Testing GPG functionality:"
docker exec $container gpg --version || echo "gpg not working"
docker exec $container apt-key list || echo "apt-key not working"
done
echo ::endgroup::
- name: Install PE on test cluster
timeout-minutes: 120
run: |
echo "=== Starting PE installation with Docker transport ==="
echo "Using inventory file:"
cat ./inventory.yaml
bundle exec bolt plan run peadm_spec::install_test_cluster \
--inventoryfile ./inventory.yaml \
--modulepath spec/fixtures/modules \
architecture=${{ matrix.architecture }} \
version=${{ matrix.version }} \
console_password=${{ secrets.CONSOLE_PASSWORD }} \
--targets pe_nodes \
--verbose
- name: output value of bolt-debug.log
if: ${{ failure() }}
run: |
echo ::group::bolt-debug-log
if [ -f bolt-debug.log ]; then
echo "=== bolt-debug.log contents ==="
cat bolt-debug.log || echo "bolt-debug.log is empty"
else
echo "No bolt-debug.log file found"
fi
echo ::endgroup::
- name: Tear down test cluster
if: ${{ always() }}
continue-on-error: true
run: |-
if [ -f spec/fixtures/litmus_inventory.yaml ]; then
echo ::group::tear_down
bundle exec rake 'litmus:tear_down'
echo ::endgroup::
echo ::group::info:request
cat request.json || true; echo
echo ::endgroup::
fi