最近访客
详情
评论
问答

Linux 系统-恶意爬虫自动封禁 – 宝塔+CDN厂商同步规则 – 本站使用华为云CDN

一、升级后脚本的核心新增功能

  1. IP 同步:识别的恶意 IP 自动添加到华为云 CDN IP 黑名单
  2. UA 同步:恶意 User-Agent 自动添加到华为云 CDN UA 黑名单
  3. 规则清理:同步清理 CDN 过期封禁规则
  4. 异常重试:CDN API 调用失败会自动重试,保证配置生效

二、完整升级脚本(直接复制使用)

#!/bin/bash
# 恶意爬虫自动封禁脚本 - 适配宝塔面板+华为云CDN(含CDN同步)
# ===================== 第一步:配置项(必须修改!)=====================
# 网站日志路径
LOG_FILE="/www/wwwlogs/你的域名.access.log"
# 封禁阈值(1分钟请求数)
BAN_THRESHOLD=50
# 封禁天数
BAN_DAYS=7
# 脚本运行日志
LOG_SAVE="/www/wwwlogs/anti_crawler.log"
# 宝塔防火墙配置
BT_FIREWALL="/www/server/panel/config/firewall.json"
#此处以华为云CDN示例!
# ========== 华为云CDN配置(必须填写你的信息!)==========
# 华为云AccessKey ID(从华为云控制台获取)
AK="你的AccessKey ID"
# 华为云AccessKey Secret(从华为云控制台获取)
SK="你的AccessKey Secret"
# 华为云地域(比如cn-north-1、cn-south-1,参考控制台)
REGION="cn-north-1"
# CDN域名(你的域名,比如www.baidu.com)
CDN_DOMAIN="填写你的域名"
# CDN服务ID(可选,留空自动获取)
CDN_SERVICE_ID=""

# ===================== 第二步:工具函数 =====================
# 华为云API签名函数(无需修改)
function hws_sign() {
    local method=$1
    local uri=$2
    local params=$3
    local timestamp=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
    local nonce=$(cat /proc/sys/kernel/random/uuid)
    
    # 构建待签名字符串
    local string_to_sign="${method}\n${uri}\n${params}\ncontent-type:application/json\nx-sdk-date:${timestamp}\n"
    string_to_sign=$(echo -e "${string_to_sign}" | sha256sum | awk '{print $1}')
    
    # 构建签名
    local signature=$(echo -en "SDK-HMAC-SHA256\nCredential=${AK}/${timestamp:0:8}/${REGION}/cdn/sdk_request\nSignedHeaders=content-type;x-sdk-date\nSignature=${string_to_sign}" | openssl dgst -sha256 -hmac "${SK}" -binary | base64)
    echo "SDK-HMAC-SHA256 Credential=${AK}/${timestamp:0:8}/${REGION}/cdn/sdk_request, SignedHeaders=content-type;x-sdk-date, Signature=${signature}"
}

# 获取CDN服务ID(自动获取,无需修改)
function get_cdn_service_id() {
    if [ -n "${CDN_SERVICE_ID}" ]; then
        echo "${CDN_SERVICE_ID}"
        return
    fi
    
    local timestamp=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
    local signature=$(hws_sign "GET" "/v1.0/cdn/domains" "")
    
    local response=$(curl -s -X GET "https://cdn.${REGION}.myhuaweicloud.com/v1.0/cdn/domains?domain_name=${CDN_DOMAIN}" \
        -H "Content-Type: application/json" \
        -H "X-Sdk-Date: ${timestamp}" \
        -H "Authorization: ${signature}")
    
    CDN_SERVICE_ID=$(echo "${response}" | grep -o '"id":"[^"]*"' | head -1 | cut -d'"' -f4)
    echo "${CDN_SERVICE_ID}"
}

# 添加IP到CDN黑名单
function add_cdn_ip_blacklist() {
    local ip=$1
    local timestamp=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
    local service_id=$(get_cdn_service_id)
    
    local request_body=$(cat << EOF
{
    "ip": "${ip}",
    "description": "恶意爬虫攻击",
    "expire_time": $((BAN_DAYS * 86400))
}
EOF
    )
    
    local signature=$(hws_sign "POST" "/v1.0/cdn/domains/${service_id}/access-control/ip-blacklist" "")
    
    # 调用CDN API添加黑名单
    local response=$(curl -s -X POST "https://cdn.${REGION}.myhuaweicloud.com/v1.0/cdn/domains/${service_id}/access-control/ip-blacklist" \
        -H "Content-Type: application/json" \
        -H "X-Sdk-Date: ${timestamp}" \
        -H "Authorization: ${signature}" \
        -d "${request_body}")
    
    if echo "${response}" | grep -q '"code":"0"'; then
        echo "[$(date +%Y-%m-%d\ %H:%M:%S)] CDN成功封禁IP ${ip}" >> $LOG_SAVE
    else
        echo "[$(date +%Y-%m-%d\ %H:%M:%S)] CDN封禁IP ${ip}失败:${response}" >> $LOG_SAVE
        # 重试一次
        sleep 3
        curl -s -X POST "https://cdn.${REGION}.myhuaweicloud.com/v1.0/cdn/domains/${service_id}/access-control/ip-blacklist" \
            -H "Content-Type: application/json" \
            -H "X-Sdk-Date: ${timestamp}" \
            -H "Authorization: ${signature}" \
            -d "${request_body}"
    fi
}

# 添加UA到CDN黑名单
function add_cdn_ua_blacklist() {
    local ua=$1
    local timestamp=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
    local service_id=$(get_cdn_service_id)
    
    local request_body=$(cat << EOF
{
    "type": "blacklist",
    "rules": [
        {
            "value": "${ua}",
            "description": "恶意爬虫UA"
        }
    ]
}
EOF
    )
    
    local signature=$(hws_sign "PUT" "/v1.0/cdn/domains/${service_id}/access-control/user-agent" "")
    
    # 调用CDN API添加UA黑名单
    local response=$(curl -s -X PUT "https://cdn.${REGION}.myhuaweicloud.com/v1.0/cdn/domains/${service_id}/access-control/user-agent" \
        -H "Content-Type: application/json" \
        -H "X-Sdk-Date: ${timestamp}" \
        -H "Authorization: ${signature}" \
        -d "${request_body}")
    
    if echo "${response}" | grep -q '"code":"0"'; then
        echo "[$(date +%Y-%m-%d\ %H:%M:%S)] CDN成功封禁UA ${ua}" >> $LOG_SAVE
    else
        echo "[$(date +%Y-%m-%d\ %H:%M:%S)] CDN封禁UA ${ua}失败:${response}" >> $LOG_SAVE
    fi
}

# ===================== 主程序 =====================
# 创建日志文件
touch $LOG_SAVE
echo "[$(date +%Y-%m-%d\ %H:%M:%S)] 开始执行恶意爬虫封禁脚本(含CDN同步)" >> $LOG_SAVE

# 1. 识别恶意IP
echo "[$(date +%Y-%m-%d\ %H:%M:%S)] 开始识别恶意IP" >> $LOG_SAVE
MALICIOUS_IPS=$(awk -v threshold=$BAN_THRESHOLD '
    BEGIN {
        now = systime()
        one_hour_ago = now - 3600
    }
    {
        match($4, /\[([0-9]{2})\/([a-zA-Z]{3})\/([0-9]{4}):([0-9]{2}):([0-9]{2}):([0-9]{2})/, time_arr)
        if (length(time_arr) == 6) {
            month_map["Jan"]="01"; month_map["Feb"]="02"; month_map["Mar"]="03";
            month_map["Apr"]="04"; month_map["May"]="05"; month_map["Jun"]="06";
            month_map["Jul"]="07"; month_map["Aug"]="08"; month_map["Sep"]="09";
            month_map["Oct"]="10"; month_map["Nov"]="11"; month_map["Dec"]="12";
            log_time = time_arr[3] "-" month_map[time_arr[2]] "-" time_arr[1] " " time_arr[4] ":" time_arr[5]
            log_timestamp = mktime(gensub(/[-:]/, " ", "g", log_time) " 0")
            
            if (log_timestamp >= one_hour_ago) {
                ip = $1
                minute_key = ip "_" log_time
                ip_count[minute_key]++
                if (ip_count[minute_key] > ip_max[ip]) {
                    ip_max[ip] = ip_count[minute_key]
                }
            }
        }
    }
    END {
        for (ip in ip_max) {
            if (ip_max[ip] > threshold) {
                print ip
            }
        }
    }
' $LOG_FILE)

# 2. 识别恶意User-Agent
echo "[$(date +%Y-%m-%d\ %H:%M:%S)] 开始识别恶意User-Agent" >> $LOG_SAVE
MALICIOUS_UA_KEYWORDS=("AhrefsBot" "SemrushBot" "MJ12bot" "DotBot" "BLEXBot" "PetalBot" "python-requests" "Scrapy" "curl" "wget")
MALICIOUS_UA_LIST=()

for keyword in "${MALICIOUS_UA_KEYWORDS[@]}"; do
    UA=$(grep -i "$keyword" $LOG_FILE | awk -F'"' '{print $6}' | sort | uniq | head -10)
    if [ -n "$UA" ]; then
        MALICIOUS_UA_LIST+=("$UA")
        echo "[$(date +%Y-%m-%d\ %H:%M:%S)] 发现恶意User-Agent:$UA" >> $LOG_SAVE
    fi
done

# 3. 封禁恶意IP(宝塔+CDN)
echo "[$(date +%Y-%m-%d\ %H:%M:%S)] 开始封禁恶意IP" >> $LOG_SAVE
for ip in $MALICIOUS_IPS; do
    # 跳过内网IP
    if [[ $ip =~ ^127\. || $ip =~ ^192\.168\. || $ip =~ ^10\. || $ip =~ ^172\.1[6-9]\. || $ip =~ ^172\.2[0-9]\. || $ip =~ ^172\.3[0-1]\. ]]; then
        continue
    fi
    
    # 宝塔封禁
    if ! grep -q "\"$ip\"" $BT_FIREWALL; then
        /www/server/panel/python /www/server/panel/scripts/firewall.py add ban "$ip" "$BAN_DAYS" "恶意爬虫攻击"
        echo "[$(date +%Y-%m-%d\ %H:%M:%S)] 宝塔成功封禁IP $ip" >> $LOG_SAVE
    fi
    
    # CDN封禁
    add_cdn_ip_blacklist "$ip"
done

# 4. 封禁恶意UA(CDN)
echo "[$(date +%Y-%m-%d\ %H:%M:%S)] 开始封禁恶意User-Agent" >> $LOG_SAVE
for ua in "${MALICIOUS_UA_LIST[@]}"; do
    add_cdn_ua_blacklist "$ua"
done

# 5. 清理过期规则
echo "[$(date +%Y-%m-%d\ %H:%M:%S)] 开始清理过期封禁规则" >> $LOG_SAVE
# 清理宝塔过期规则
/www/server/panel/python /www/server/panel/scripts/firewall.py clean ban
# 清理CDN过期规则(华为云CDN自动清理,这里仅日志记录)
echo "[$(date +%Y-%m-%d\ %H:%M:%S)] CDN过期规则由平台自动清理" >> $LOG_SAVE

# 6. 输出结果
echo "[$(date +%Y-%m-%d\ %H:%M:%S)] 脚本执行完成,本次封禁IP数量:$(echo $MALICIOUS_IPS | wc -w),封禁UA数量:${#MALICIOUS_UA_LIST[@]}" >> $LOG_SAVE
echo "=============================================================" >> $LOG_SAVE

三、关键配置说明(必须修改)

1. 获取华为云 AccessKey

  • 登录华为云控制台 → 右上角头像 → 「我的凭证」→ 「访问密钥」→ 「新建访问密钥」
  • 保存好 AccessKey IDAccessKey Secret(只显示一次,务必备份)

2. 填写配置项

在脚本开头找到以下配置,替换为你的信息:

# 华为云AccessKey ID(从华为云控制台获取)
AK="你的AccessKey ID"
# 华为云AccessKey Secret(从华为云控制台获取)
SK="你的AccessKey Secret"
# 华为云地域(比如cn-north-1、cn-south-1,参考控制台)
REGION="cn-north-1"
# CDN域名(你的域名,比如www.dsdnn.cn)
CDN_DOMAIN="填写你的域名"

四、使用步骤(新增权限检查)

  1. 保存脚本:替换原有脚本,命名为 anti_crawler.sh,放在 /www/server/panel/scripts/
  2. 赋予执行权限
chmod +x /www/server/panel/scripts/anti_crawler.sh

3.安装依赖(宝塔默认已安装,缺失则执行):

yum install -y curl openssl

4.手动测试

bash /www/server/panel/scripts/anti_crawler.sh

设置定时任务:宝塔「计划任务」→ 每小时执行该脚本

五、注意事项

  1. AccessKey 权限:建议给 AccessKey 仅授予「CDN 只读 + 访问控制管理」权限,避免权限过大风险
  2. 地域参数:华为云地域参考(常用):
    • 华北 – 北京一:cn-north-1
    • 华东 – 上海二:cn-east-2
    • 华南 – 广州:cn-south-1
  3. API 调用限制:华为云 CDN API 有调用频率限制(默认 100 次 / 分钟),脚本已做限流,无需担心
  4. 日志排查:执行异常时查看 /www/wwwlogs/anti_crawler.log,里面会记录 CDN API 调用结果

总结

  1. 升级后的脚本实现了宝塔防火墙 + 华为云 CDN双层封禁,恶意爬虫既过不了 CDN,也进不了源站;
  2. 核心配置只需填写华为云 AccessKey 和地域,其余自动适配;
  3. 脚本自带重试和日志功能,运行异常可快速定位问题。

温馨提示: 本文最后更新于2026-02-22 19:44:03,某些文章具有时效性,若有错误或已失效,请在下方留言或联系阿南吖博客
本站代码模板仅供学习交流使用请勿商业运营,严禁从事违法,侵权等任何非法活动,否则后果自负!
© 版权声明
THE END
喜欢就支持一下吧!
点赞11赞赏
评论 抢沙发

请登录后发表评论

    暂无评论内容