[Misc] Auto-size Minikube memory via calculate_safe_memory (#637) #5

Workflow file for this run

.github/workflows/router-e2e-test.yml at cf3253c

	name: Router E2E tests

	on:
	push:
	branches:
	- main
	paths:
	- 'src/vllm_router/**'
	- 'docker/**'
	- 'pyproject.toml'
	- '.github/**'
	pull_request:
	paths:
	- 'src/vllm_router/**'
	- 'docker/**'
	- 'pyproject.toml'
	- '.github/**'
	workflow_dispatch:

	jobs:
	e2e-test:
	runs-on: ubuntu-latest
	if: github.event.pull_request.draft == false
	permissions:
	contents: read
	packages: write
	steps:
	- name: Checkout Repository
	uses: actions/checkout@v4
	with:
	fetch-depth: 0

	- name: Setup Python
	uses: actions/setup-python@v5
	with:
	python-version: "3.12"
	cache: pip

	- name: Install dependencies
	run: \|
	python -m pip install --upgrade pip
	pip install -r src/tests/requirements.txt
	pip install -r requirements-test.txt
	pip install -e .

	- name: Make scripts executable
	run: \|
	chmod +x src/vllm_router/perf-test.sh
	chmod +x src/tests/perftest/*.sh

	- name: Start Mock OpenAI servers
	working-directory: src/tests/perftest
	run: \|
	bash run-multi-server.sh 4 500
	sleep 10

	- name: Start Router for Testing
	run: \|
	bash src/vllm_router/perf-test.sh 8000 &
	sleep 5

	- name: Run Performance tests
	working-directory: src/tests/perftest
	env:
	PYTHONPATH: ${{ github.workspace }}
	run: \|
	echo "PYTHONPATH=$PYTHONPATH"
	mkdir -p logs
	if [ ! -f "request_generator.py" ]; then
	echo "Error: request_generator.py not found!"
	exit 1
	fi
	# Run with Python debug option
	python3 -v request_generator.py --qps 10 --num-workers 32 --duration 300 2>&1 \| tee logs/request_generator.log

	- name: Run E2E Tests with Coverage
	run: \|
	pip install coverage
	coverage run --source=src/vllm_router -m pytest src/tests/test_*.py
	coverage report -m > coverage.txt

	- name: Cleanup Test Environment
	if: always()
	working-directory: src/tests/perftest
	run: \|
	bash clean-up.sh

	- name: Upload Coverage Report
	if: always()
	uses: actions/upload-artifact@v4
	with:
	name: coverage-report
	path: coverage.txt

	- name: Upload Test logs
	if: always()
	uses: actions/upload-artifact@v4
	with:
	name: test-logs-pr-${{ github.event.pull_request.number \|\| 'manual' }}
	path: \|
	~/.kube/config
	src/tests/perftest/logs

	k8s-discovery-e2e-test:
	runs-on: self-hosted
	needs: e2e-test
	if: github.event.pull_request.draft == false
	steps:
	- name: Check out repository code
	uses: actions/checkout@v4

	- name: Install Python dependencies
	run: \|
	source /usr/local/bin/conda-init
	conda activate llmstack
	python -m pip install --upgrade pip
	pip install -r benchmarks/multi-round-qa/requirements.txt
	pip install -e .

	- name: Setup minikube environment
	env:
	DOCKER_BUILDKIT: 1
	run: \|
	echo "🔧 Setting up minikube environment"
	sudo sysctl fs.protected_regular=0
	# Verify minikube is running
	minikube status
	# Ensure kubectl is configured for minikube
	kubectl config use-context minikube

	- name: Build and deploy router image
	env:
	DOCKER_BUILDKIT: 1
	run: \|
	echo "🔨 Building router docker image"
	cd ${{ github.workspace }}
	eval "$(minikube docker-env)"
	docker build --build-arg INSTALL_OPTIONAL_DEP=default -t git-act-router -f docker/Dockerfile.kvaware .

	- name: Run all k8s discovery routing tests
	run: \|
	source /usr/local/bin/conda-init
	conda activate llmstack
	echo "🧪 Running all k8s discovery routing tests"
	./tests/e2e/run-k8s-routing-test.sh all \
	--model "facebook/opt-125m" \
	--num-requests 25 \
	--chunk-size 128 \
	--verbose \
	--result-dir /tmp/k8s-discovery-routing-results-pr-${{ github.event.pull_request.number \|\| 'main' }} \
	--timeout 10
	timeout-minutes: 10

	- name: Archive k8s discovery routing test results
	uses: actions/upload-artifact@v4
	if: always()
	with:
	name: k8s-discovery-routing-test-results-pr-${{ github.event.pull_request.number \|\| 'main' }}
	path: \|
	/tmp/k8s-discovery-routing-results-pr-${{ github.event.pull_request.number \|\| 'main' }}/*

	- run: echo "🍏 K8s discovery e2e test job status is ${{ job.status }}."

	static-discovery-e2e-test:
	runs-on: self-hosted
	needs: e2e-test
	if: github.event.pull_request.draft == false
	env:
	LOG_DIR: /tmp/static-discovery-e2e-test-${{ github.event.pull_request.number \|\| 'main' }}

	steps:
	- name: Check out repository code
	uses: actions/checkout@v4

	- name: Install Python dependencies
	run: \|
	source /usr/local/bin/conda-init
	conda activate llmstack
	python -m pip install --upgrade pip
	pip install -e .

	- name: Install vLLM and lmcache
	run: \|
	source /usr/local/bin/conda-init
	conda activate llmstack
	pip install vllm
	pip install lmcache

	- name: Start 2 vLLM serve backends
	run: \|
	source /usr/local/bin/conda-init
	conda activate llmstack
	echo "🚀 Starting vLLM serve backend"
	mkdir -p "$LOG_DIR"
	CUDA_VISIBLE_DEVICES=0 vllm serve facebook/opt-125m --port 8001 --gpu-memory-utilization 0.7 --chat-template .github/template-chatml.jinja > "$LOG_DIR/backend1.log" 2>&1 &
	CUDA_VISIBLE_DEVICES=1 vllm serve facebook/opt-125m --port 8002 --gpu-memory-utilization 0.7 --chat-template .github/template-chatml.jinja > "$LOG_DIR/backend2.log" 2>&1 &

	- name: Wait for backends to be ready
	run: \|
	echo "⏳ Waiting for backends to be ready"
	chmod +x tests/e2e/wait-for-backends.sh
	./tests/e2e/wait-for-backends.sh 180 "http://localhost:8001" "http://localhost:8002"

	- name: Run All Static Discovery Routing Tests
	run: \|
	source /usr/local/bin/conda-init
	conda activate llmstack
	echo "🧪 Running all static discovery routing tests sequentially"
	chmod +x tests/e2e/run-static-discovery-routing-test.sh
	./tests/e2e/run-static-discovery-routing-test.sh all \
	--pythonpath "$PYTHONPATH" \
	--log-dir "$LOG_DIR" \
	--num-requests 20 \
	--verbose \
	--backends-url "http://localhost:8001,http://localhost:8002"
	timeout-minutes: 5

	- name: Archive static discovery test results and logs
	uses: actions/upload-artifact@v4
	if: always()
	with:
	name: static-discovery-test-results-pr-${{ github.event.pull_request.number \|\| 'main' }}
	path: \|
	${{ env.LOG_DIR }}/*

	- name: Cleanup processes
	if: always()
	run: \|
	echo "🧹 Cleaning up processes"
	pkill -f "vllm serve" \|\| true
	pkill -f "python3 -m src.vllm_router.app" \|\| true

	- run: echo "🍏 Static discovery e2e test job status is ${{ job.status }}."

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[Misc] Auto-size Minikube memory via calculate_safe_memory (#637) #5

Workflow file

[Misc] Auto-size Minikube memory via calculate_safe_memory (#637) #5

Uh oh!

Jobs

Run details

Workflow file for this run