#!/bin/bash # Docker服务健康检查脚本 # 用于检查所有服务的健康状态 set -e # 颜色定义 RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[1;33m' BLUE='\033[0;34m' NC='\033[0m' # No Color # 日志函数 log_info() { echo -e "${BLUE}[INFO]${NC} $1" } log_success() { echo -e "${GREEN}[SUCCESS]${NC} $1" } log_warning() { echo -e "${YELLOW}[WARNING]${NC} $1" } log_error() { echo -e "${RED}[ERROR]${NC} $1" } # 检查Docker是否运行 check_docker() { log_info "检查Docker服务状态..." if ! docker info >/dev/null 2>&1; then log_error "Docker未运行或无法访问" exit 1 fi log_success "Docker服务正常运行" } # 检查容器状态 check_container_status() { local container_name=$1 local service_name=$2 log_info "检查${service_name}容器状态..." if ! docker ps --format "table {{.Names}}" | grep -q "^${container_name}$"; then log_error "${service_name}容器未运行" return 1 fi # 检查容器健康状态 local health_status=$(docker inspect --format='{{.State.Health.Status}}' ${container_name} 2>/dev/null || echo "no-healthcheck") case $health_status in "healthy") log_success "${service_name}容器健康状态正常" return 0 ;; "unhealthy") log_error "${service_name}容器健康检查失败" return 1 ;; "starting") log_warning "${service_name}容器正在启动中..." return 2 ;; "no-healthcheck") log_warning "${service_name}容器未配置健康检查" return 0 ;; *) log_warning "${service_name}容器健康状态未知: ${health_status}" return 0 ;; esac } # 检查MySQL数据库连接 check_mysql() { log_info "检查MySQL数据库连接..." local max_attempts=5 local attempt=1 while [ $attempt -le $max_attempts ]; do if docker exec knowledge_base_mysql mysqladmin ping -h localhost --silent 2>/dev/null; then log_success "MySQL数据库连接正常" return 0 fi log_warning "MySQL连接尝试 ${attempt}/${max_attempts} 失败,等待5秒后重试..." sleep 5 ((attempt++)) done log_error "MySQL数据库连接失败" return 1 } # 检查Redis连接 check_redis() { log_info "检查Redis缓存连接..." if docker exec knowledge_base_redis redis-cli ping | grep -q "PONG"; then log_success "Redis缓存连接正常" return 0 else log_error "Redis缓存连接失败" return 1 fi } # 检查Meilisearch连接 check_meilisearch() { log_info "检查Meilisearch搜索引擎连接..." local max_attempts=3 local attempt=1 while [ $attempt -le $max_attempts ]; do if docker exec knowledge_base_meilisearch curl -f http://localhost:7700/health >/dev/null 2>&1; then log_success "Meilisearch搜索引擎连接正常" return 0 fi log_warning "Meilisearch连接尝试 ${attempt}/${max_attempts} 失败,等待3秒后重试..." sleep 3 ((attempt++)) done log_error "Meilisearch搜索引擎连接失败" return 1 } # 检查Web应用健康状态 check_web_app() { log_info "检查Web应用健康状态..." local max_attempts=3 local attempt=1 while [ $attempt -le $max_attempts ]; do local response=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:8000/health 2>/dev/null || echo "000") if [ "$response" = "200" ]; then log_success "Web应用健康检查通过" return 0 elif [ "$response" = "503" ]; then log_warning "Web应用部分服务不可用,但应用仍在运行" return 2 fi # 如果没有专门的健康检查路由,尝试访问根路径 local root_response=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:8000/ 2>/dev/null || echo "000") if [ "$root_response" = "200" ]; then log_success "Web应用根路径访问正常" return 0 fi log_warning "Web应用健康检查尝试 ${attempt}/${max_attempts} 失败 (HTTP: ${response}),等待5秒后重试..." sleep 5 ((attempt++)) done log_error "Web应用健康检查失败" return 1 } # 检查队列处理器 check_queue_worker() { log_info "检查队列处理器状态..." # 检查应用容器中的队列进程是否正在运行 if docker exec knowledge_base_app pgrep -f "queue:work" >/dev/null 2>&1; then log_success "队列处理器正常运行" return 0 else log_error "队列处理器进程未运行" return 1 fi } # 检查数据持久化 check_data_persistence() { log_info "检查数据持久化状态..." local errors=0 # 检查存储目录 local storage_dirs=("./storage/mysql" "./storage/redis" "./storage/meilisearch" "./storage/app") for dir in "${storage_dirs[@]}"; do if [ ! -d "$dir" ]; then log_error "存储目录不存在: $dir" ((errors++)) elif [ ! -w "$dir" ]; then log_error "存储目录不可写: $dir" ((errors++)) fi done if [ $errors -eq 0 ]; then log_success "数据持久化配置正常" return 0 else log_error "发现 $errors 个数据持久化问题" return 1 fi } # 主检查函数 main() { echo "========================================" echo "Docker服务健康检查开始" echo "时间: $(date)" echo "========================================" local total_checks=0 local failed_checks=0 local warning_checks=0 # 执行所有检查 checks=( "check_docker:Docker服务" "check_container_status:knowledge_base_mysql:MySQL容器" "check_container_status:knowledge_base_redis:Redis容器" "check_container_status:knowledge_base_meilisearch:Meilisearch容器" "check_container_status:knowledge_base_app:应用容器" "check_mysql:MySQL连接" "check_redis:Redis连接" "check_meilisearch:Meilisearch连接" "check_web_app:Web应用" "check_queue_worker:队列处理器" "check_data_persistence:数据持久化" ) for check in "${checks[@]}"; do IFS=':' read -ra CHECK_PARTS <<< "$check" local check_func="${CHECK_PARTS[0]}" local check_args=("${CHECK_PARTS[@]:1}") ((total_checks++)) if [ ${#check_args[@]} -eq 0 ]; then $check_func else $check_func "${check_args[@]}" fi local result=$? if [ $result -eq 1 ]; then ((failed_checks++)) elif [ $result -eq 2 ]; then ((warning_checks++)) fi echo "" done # 输出总结 echo "========================================" echo "健康检查完成" echo "总检查项: $total_checks" echo "失败: $failed_checks" echo "警告: $warning_checks" echo "成功: $((total_checks - failed_checks - warning_checks))" echo "========================================" if [ $failed_checks -gt 0 ]; then log_error "发现 $failed_checks 个严重问题,请检查服务状态" exit 1 elif [ $warning_checks -gt 0 ]; then log_warning "发现 $warning_checks 个警告,服务可能需要关注" exit 2 else log_success "所有服务健康检查通过" exit 0 fi } # 如果脚本被直接执行 if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then main "$@" fi