From 6030f022145e502ff4d93491d29e9db6d195ece1 Mon Sep 17 00:00:00 2001 From: huangtuq <277837260@qq.com> Date: Wed, 29 Dec 2021 22:58:15 +0800 Subject: [PATCH] fix deploy and initial scripts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 更新安装和部署脚本,以及服务端和节点端的初始化脚本 --- README.md | 7 +- package.sh | 3 + script/node/conf | 2 + script/node/init.sh | 20 + script/node/monitor/init.sh | 1 + .../monitor/node_exporter_deploy.sh | 15 +- script/node/pre_init.sh | 54 ++ script/node/vmcore/init.sh | 1 + script/node/vmcore/init_vmcore.sh | 35 ++ script/node/vmcore/vmcore_collect.py | 99 ++++ script/server/init.sh | 15 + script/{ => server}/monitor/Readme.txt | 0 .../{ => server}/monitor/grafana_api_set.sh | 1 + script/server/monitor/init.sh | 1 + script/{ => server}/monitor/local_copy_pkg.sh | 0 .../monitor/monitor_server_clear.sh | 2 +- .../monitor/monitor_server_deploy.sh | 35 +- .../monitor/netinfo_dashboard.json | 0 .../monitor/netinfo_dashboard_new.json | 0 .../monitor/prometheus_get_node.py | 12 +- .../{ => server}/monitor/sysom-dashboard.json | 0 script/server/monitor/test.py | 17 + script/server/vmcore/init.sh | 1 + script/server/vmcore/init_server.sh | 15 + script/server/vmcore/parse_panic.py | 500 ++++++++++++++++++ script/server/vmcore/vmcore_const.py | 25 + tools/deploy/deploy.sh | 58 +- tools/deploy/sysom.conf | 16 +- 28 files changed, 875 insertions(+), 60 deletions(-) create mode 100644 script/node/conf create mode 100644 script/node/init.sh create mode 120000 script/node/monitor/init.sh rename script/{ => node}/monitor/node_exporter_deploy.sh (67%) create mode 100755 script/node/pre_init.sh create mode 120000 script/node/vmcore/init.sh create mode 100644 script/node/vmcore/init_vmcore.sh create mode 100644 script/node/vmcore/vmcore_collect.py create mode 100644 script/server/init.sh rename script/{ => server}/monitor/Readme.txt (100%) rename script/{ => server}/monitor/grafana_api_set.sh (90%) create mode 120000 script/server/monitor/init.sh rename script/{ => server}/monitor/local_copy_pkg.sh (100%) rename script/{ => server}/monitor/monitor_server_clear.sh (93%) rename script/{ => server}/monitor/monitor_server_deploy.sh (81%) rename script/{ => server}/monitor/netinfo_dashboard.json (100%) rename script/{ => server}/monitor/netinfo_dashboard_new.json (100%) rename script/{ => server}/monitor/prometheus_get_node.py (58%) rename script/{ => server}/monitor/sysom-dashboard.json (100%) create mode 100644 script/server/monitor/test.py create mode 120000 script/server/vmcore/init.sh create mode 100644 script/server/vmcore/init_server.sh create mode 100644 script/server/vmcore/parse_panic.py create mode 100644 script/server/vmcore/vmcore_const.py diff --git a/README.md b/README.md index e596707f..2557143a 100644 --- a/README.md +++ b/README.md @@ -29,7 +29,10 @@ ``` tar xf sysomRelease-20211207022031.tar.gz cd sysomRelease-20211207022031 - # 默认部署在 /home/sysom 目录,可以接参数自定义安装目录, - bash deploy.sh /usr/local + # 使用deploy.sh脚本部署项目,需要带三个参数, + # arg1 : 部署目录, + # arg2 : 内网IP(主要是方便内网通讯,用户需要保证内网能通) + # arg3 : 外网IP(浏览器可以访问到的IP地址) + bash deploy.sh /usr/local/sysom 192.168.100.100 100.100.22.22 ``` diff --git a/package.sh b/package.sh index 36dec827..15ea3860 100644 --- a/package.sh +++ b/package.sh @@ -4,9 +4,11 @@ green="\033[32m" RELEASE=sysomRelease-$(date +"%Y%m%d%H%M%S") APIDIR=sysom_api WEBDIR=sysom_web +SCRIPTDIR=script TOOLSDIR=tools # build web pushd sysom_web +yarn add umi yarn build popd @@ -15,6 +17,7 @@ cp -r ${APIDIR}/ ${TOOLSDIR}/ ${RELEASE}/ cp -r ${WEBDIR}/dist/ ${RELEASE}/${WEBDIR}/ mkdir -p ${RELEASE}/${WEBDIR}/download/ cp ${TOOLSDIR}/deploy/deploy.sh ${RELEASE}/ +cp -r ${SCRIPTDIR} ${RELEASE}/ tar czf ${RELEASE}.tar.gz ${RELEASE}/ rm -rf ${RELEASE} printf "$green The release pacakge is ${RELEASE}.tar.gz" diff --git a/script/node/conf b/script/node/conf new file mode 100644 index 00000000..0fa2c673 --- /dev/null +++ b/script/node/conf @@ -0,0 +1,2 @@ +APP_HOME=/usr/local/sysom +SERVER_IP=172.16.139.35 diff --git a/script/node/init.sh b/script/node/init.sh new file mode 100644 index 00000000..b1817fe1 --- /dev/null +++ b/script/node/init.sh @@ -0,0 +1,20 @@ +#!/bin/bash -x + +basedir=`dirname $0` + +cd $basedir + +for i in `cat conf` +do + export $i +done + +for dir in `ls` +do + if [ -d $dir ] + then + pushd $dir + bash -x init.sh + popd + fi +done diff --git a/script/node/monitor/init.sh b/script/node/monitor/init.sh new file mode 120000 index 00000000..8a68572a --- /dev/null +++ b/script/node/monitor/init.sh @@ -0,0 +1 @@ +node_exporter_deploy.sh \ No newline at end of file diff --git a/script/monitor/node_exporter_deploy.sh b/script/node/monitor/node_exporter_deploy.sh similarity index 67% rename from script/monitor/node_exporter_deploy.sh rename to script/node/monitor/node_exporter_deploy.sh index c9c37840..f5660b85 100755 --- a/script/monitor/node_exporter_deploy.sh +++ b/script/node/monitor/node_exporter_deploy.sh @@ -1,11 +1,13 @@ #!/bin/bash -x -RESOURCE_DIR=/usr/local/sysom/monitor +RESOURCE_DIR=${APP_HOME}/monitor NODE_EXPORTER_VER=1.2.2 NODE_EXPORTER_ARCH=linux-amd64 NODE_EXPORTER_PKG=node_exporter-${NODE_EXPORTER_VER}.${NODE_EXPORTER_ARCH} -NODE_EXPORTER_TAR=$NODE_EXPORTER_PKG.tar.gz +NODE_EXPORTER_TAR=${NODE_EXPORTER_PKG}.tar.gz + +exit 0 ##设置node_exporter开机自动启动 cat << EOF > node_exporter.service [Unit] @@ -23,14 +25,15 @@ EOF main() { - tar -zxvf $NODE_EXPORTER_TAR - rm -rf $RESOURCE_DIR/node_exporter - mv $NODE_EXPORTER_PKG $RESOURCE_DIR/node_exporter + tar -zxvf ${NODE_EXPORTER_TAR} + rm -rf ${RESOURCE_DIR}/node_exporter + mkdir -p ${RESOURCE_DIR} + mv ${NODE_EXPORTER_PKG} ${RESOURCE_DIR}/node_exporter mv node_exporter.service /usr/lib/systemd/system systemctl daemon-reload systemctl enable node_exporter systemctl start node_exporter - ps -elf | grep "/usr/local/sysom/monitor/node_exporter/node_exporter" | grep -v grep 1>/dev/null + ps -elf | grep "${RESOURCE_DIR}/node_exporter/node_exporter" | grep -v grep 1>/dev/null if [ $? -ne 0 ] then exit 1 diff --git a/script/node/pre_init.sh b/script/node/pre_init.sh new file mode 100755 index 00000000..e39c3528 --- /dev/null +++ b/script/node/pre_init.sh @@ -0,0 +1,54 @@ +#!/bin/bash -x + +UPLOAD_DIR=${APP_HOME}/target/sysom_web/download/ +APP_CMD_CONF=${APP_HOME}/target/sysom_api/conf/__init__.py +RESOURCE_DIR=${APP_HOME}/monitor +PROMETHEUS_ARCH=linux-amd64 +NODE_EXPORTER_VER=1.2.2 +NODE_EXPORTER_PKG=node_exporter-${NODE_EXPORTER_VER}.${PROMETHEUS_ARCH} +NODE_EXPORTER_TAR=$NODE_EXPORTER_PKG.tar.gz +NODE_INIT_DIR=sysom_node_init +NODE_INIT_PKG=sysom_node_init.tar.gz + +BASE_DIR=`dirname $0` + +init_monitor() +{ + cp ${RESOURCE_DIR}/${NODE_EXPORTER_TAR} monitor/ +} + +prepare_init_tar() +{ + rm -f conf + echo "APP_HOME=${APP_HOME}" >> conf + echo "SERVER_IP=${SERVER_IP}" >> conf + mkdir -p ../${NODE_INIT_DIR} + cp -r * ../${NODE_INIT_DIR} + rm -f ../${NODE_INIT_DIR}/pre_init.sh + tar -zvcf ../${NODE_INIT_PKG} ../${NODE_INIT_DIR} + rm -rf ../${NODE_INIT_DIR} + mv ../${NODE_INIT_PKG} ${UPLOAD_DIR} +} + +set_node_init_cmd() +{ + line_num=`cat -n $APP_CMD_CONF | grep CLIENT_DEPLOY_CMD | awk '{print $1}'` + sed -i "s/CLIENT_DEPLOY_CMD/#CLIENT_DEPLOY_CMD/g" $APP_CMD_CONF + sed -i "$line_num a \ \ \ \ CLIENT_DEPLOY_CMD = \'mkdir -p /tmp/sysom;cd /tmp/sysom;wget http://${SERVER_IP}/download/${NODE_INIT_PKG};tar -xf ${NODE_INIT_PKG};bash -x ${NODE_INIT_DIR}/init.sh\'" $APP_CMD_CONF +} + +pre_init() +{ + pushd ${BASE_DIR} + init_monitor + prepare_init_tar + set_node_init_cmd + popd +} + +main() +{ + pre_init +} + +main diff --git a/script/node/vmcore/init.sh b/script/node/vmcore/init.sh new file mode 120000 index 00000000..6223fe60 --- /dev/null +++ b/script/node/vmcore/init.sh @@ -0,0 +1 @@ +init_vmcore.sh \ No newline at end of file diff --git a/script/node/vmcore/init_vmcore.sh b/script/node/vmcore/init_vmcore.sh new file mode 100644 index 00000000..08115058 --- /dev/null +++ b/script/node/vmcore/init_vmcore.sh @@ -0,0 +1,35 @@ +#! /bin/sh +yum install nfs-utils rpcbind -y +systemctl start rpcbind && systemctl enable rpcbind +systemctl start nfs && systemctl enable nfs + +VMCORE_HOME=${APP_HOME}/vmcore +VMCORE_NFS_HOME=${APP_HOME}/vmcore/vmcore-nfs +mkdir -p ${VMCORE_HOME} + +service=" +[Unit] +Description=Collect vmcore file to oss +After=network.target network-online.target remote-fs.target basic.target +DefaultDependencies=no + +[Service] +Type=forking +ExecStart=/usr/bin/python3 ${VMCORE_HOME}/vmcore_collect.py ${SERVER_IP} ${VMCORE_NFS_HOME} +StartLimitInterval=0 +StandardOutput=syslog +StandardError=inherit + +[Install] +WantedBy=multi-user.target +" + +cat << EOF > vmcore-collect.service +$service +EOF + +cp vmcore_collect.py ${VMCORE_HOME} +mv vmcore-collect.service /usr/lib/systemd/system/vmcore-collect.service +chmod 644 /usr/lib/systemd/system/vmcore-collect.service +systemctl daemon-reload +systemctl enable vmcore-collect.service diff --git a/script/node/vmcore/vmcore_collect.py b/script/node/vmcore/vmcore_collect.py new file mode 100644 index 00000000..484492b4 --- /dev/null +++ b/script/node/vmcore/vmcore_collect.py @@ -0,0 +1,99 @@ +# -*- coding: utf-8 -*- +import os +import sys +from datetime import datetime +import json +import traceback +import socket + +nfs_ip = '127.0.0.1' +nfs_dir = '/usr/vmcore-nfs' +if len(sys.argv) == 3 : + nfs_ip = sys.argv[1] + nfs_dir = sys.argv[2] + +def get_crash_path(): + try: + if os.path.exists('/etc/kdump.conf'): + with open('/etc/kdump.conf', 'r') as f1: + lines = f1.readlines() + part = '' + var_path = '' + for line in lines: + if line.startswith('ext4'): + if len(line.split()) > 1: + part0 = line.split()[1] + else: + continue + if part0.startswith('/dev/'): + cmd = 'lsblk %s' % (part0) + output = os.popen(cmd) + ret = output.read().strip() + output.close() + part = ret.splitlines()[-1].split()[-1] + elif part0.startswith('LABEL='): + part = part0.split('=')[-1] + elif line.startswith('path'): + var_path = line.split()[-1] + if len(part) > 0 and len(var_path) > 0: + return "%s%s" % (part, var_path) + elif len(var_path) > 0: + return var_path + else: + return '/var/crash/' + except: + pass + return '/var/crash/' + +def upload_nfs(vmcore_dir): + try: + hostname=socket.gethostname() + ip=socket.gethostbyname(hostname) + timelist = vmcore_dir.split('-')[1:] + core_time = ''.join(timelist) + core_time = core_time.replace(':','') + vmcore_name ='%s_%s'%(core_time,ip) + cmd = 'mkdir -p /tmp/vmcore-nfs' + ret = os.system(cmd) + cmd = 'mount -t nfs %s:%s /tmp/vmcore-nfs' % (nfs_ip,nfs_dir) + ret = os.system(cmd) + cmd = 'mkdir /tmp/vmcore-nfs/%s' % vmcore_name + ret = os.system(cmd) + cmd = 'cp %s/vmcore-dmesg.txt /tmp/vmcore-nfs/%s/vmcore-dmesg.txt' % (vmcore_dir,vmcore_name) + ret = os.system(cmd) + if ret != 0: + print('faile to copy to nfs %s' % vmcore_dir) + cmd = 'cp %s/vmcore /tmp/vmcore-nfs/%s/vmcore' % (vmcore_dir,vmcore_name) + ret = os.system(cmd) + if ret != 0: + print('faile to copy to nfs %s' % vmcore_dir) + cmd = 'umount /tmp/vmcore-nfs' + ret = os.system(cmd) + with open('%s/.upload' % vmcore_dir,'w') as f: + pass + + except: + import traceback + traceback.print_exc() + +def main(): + crash_path = get_crash_path() + dirs_list = [] + files = os.listdir(crash_path) + files_path = [f'{crash_path}/{file}' for file in files] + for file in files_path: + if os.path.isfile(file): + continue + if file.find('-') < 0: + continue + dirs_list.append(file) + dirs_list.sort(key=lambda fp: os.path.getmtime(fp),reverse=True) + for dir in dirs_list: + tmp = '%s/.upload' % dir + if os.path.exists(tmp): + break + upload_nfs(dir) + +if __name__=="__main__": + main() + diff --git a/script/server/init.sh b/script/server/init.sh new file mode 100644 index 00000000..1e768856 --- /dev/null +++ b/script/server/init.sh @@ -0,0 +1,15 @@ +#!/bin/bash -x + +basedir=`dirname $0` + +cd $basedir + +for dir in `ls` +do + if [ -d $dir ] + then + pushd $dir + bash -x init.sh + popd + fi +done diff --git a/script/monitor/Readme.txt b/script/server/monitor/Readme.txt similarity index 100% rename from script/monitor/Readme.txt rename to script/server/monitor/Readme.txt diff --git a/script/monitor/grafana_api_set.sh b/script/server/monitor/grafana_api_set.sh similarity index 90% rename from script/monitor/grafana_api_set.sh rename to script/server/monitor/grafana_api_set.sh index fdb69372..22d17b93 100755 --- a/script/monitor/grafana_api_set.sh +++ b/script/server/monitor/grafana_api_set.sh @@ -6,6 +6,7 @@ GAFANA_CONFIG=/etc/grafana/grafana.ini sed 's/;allow_embedding = false/allow_embedding = true/g' -i $GAFANA_CONFIG sed 's/;disable_login_form = false/disable_login_form = true/g' -i $GAFANA_CONFIG sed '/enable anonymous access/{n;s/;enabled = false/enabled = true/;}' -i $GAFANA_CONFIG +sed 's/;root_url = %(protocol)s:\/\/%(domain)s:%(http_port)s\//root_url = %(protocol)s:\/\/%(domain)s\/grafana\//g' -i $GAFANA_CONFIG ##login grafana, and get cookie diff --git a/script/server/monitor/init.sh b/script/server/monitor/init.sh new file mode 120000 index 00000000..3a90bc95 --- /dev/null +++ b/script/server/monitor/init.sh @@ -0,0 +1 @@ +monitor_server_deploy.sh \ No newline at end of file diff --git a/script/monitor/local_copy_pkg.sh b/script/server/monitor/local_copy_pkg.sh similarity index 100% rename from script/monitor/local_copy_pkg.sh rename to script/server/monitor/local_copy_pkg.sh diff --git a/script/monitor/monitor_server_clear.sh b/script/server/monitor/monitor_server_clear.sh similarity index 93% rename from script/monitor/monitor_server_clear.sh rename to script/server/monitor/monitor_server_clear.sh index b60482f6..ced882d6 100755 --- a/script/monitor/monitor_server_clear.sh +++ b/script/server/monitor/monitor_server_clear.sh @@ -1,6 +1,6 @@ #!/bin/bash -x -RESOURCE_DIR=/usr/local/sysom/monitor +RESOURCE_DIR=$1/monitor disable_prometheus() { diff --git a/script/monitor/monitor_server_deploy.sh b/script/server/monitor/monitor_server_deploy.sh similarity index 81% rename from script/monitor/monitor_server_deploy.sh rename to script/server/monitor/monitor_server_deploy.sh index bb69d904..9bc65fd5 100755 --- a/script/monitor/monitor_server_deploy.sh +++ b/script/server/monitor/monitor_server_deploy.sh @@ -1,8 +1,8 @@ #!/bin/bash -x -UPLOAD_DIR=/usr/local/sysom/target/sysom_web/download/ +UPLOAD_DIR=${APP_HOME}/target/sysom_web/download/ +RESOURCE_DIR=${APP_HOME}/monitor GRAFANA_PKG=grafana-8.2.5-1.x86_64.rpm -RESOURCE_DIR=/usr/local/sysom/monitor PROMETHEUS_VER=2.29.1 PROMETHEUS_ARCH=linux-amd64 PROMETHEUS_PKG=prometheus-${PROMETHEUS_VER}.${PROMETHEUS_ARCH} @@ -55,20 +55,19 @@ install_grafana() ##configure prometheus.yml to auto discovery new nodes add_auto_discovery() { - _dir=${PWD} - pushd $RESOURCE_DIR/prometheus + pushd ${RESOURCE_DIR}/prometheus mkdir -p node cat << EOF >> prometheus.yml - job_name: 'auto_discovery' file_sd_configs: - files: - - "/usr/local/sysom/monitor/prometheus/node/node.json" + - "${RESOURCE_DIR}/prometheus/node/node.json" refresh_interval: 10s EOF popd - cp prometheus_get_node.py $RESOURCE_DIR/prometheus/ + cp prometheus_get_node.py ${RESOURCE_DIR}/prometheus/ } start_prometheus_service() @@ -117,8 +116,8 @@ install_prometheus() popd } -##download node_exporter pkg and upload to sysom dir -upload_node_exporter() +##download node_exporter pkg +download_node_exporter() { echo "install node_exporter......" pushd $RESOURCE_DIR @@ -130,12 +129,8 @@ upload_node_exporter() echo "wget node_exporter" wget https://github.com/prometheus/node_exporter/releases/download/v$NODE_EXPORTER_VER/$NODE_EXPORTER_TAR fi -# tar -zxvf $NODE_EXPORTER_TAR - -# mv $NODE_EXPORTER_PKG node_exporter - cp $NODE_EXPORTER_TAR $UPLOAD_DIR popd - cp node_exporter_deploy.sh $UPLOAD_DIR + } configure_grafana() @@ -145,12 +140,12 @@ configure_grafana() configure_cron() { - echo "* * * * * python3 $RESOURCE_DIR/prometheus/prometheus_get_node.py" >> /var/spool/cron/root - echo "* * * * * sleep 10;python3 $RESOURCE_DIR/prometheus/prometheus_get_node.py" >> /var/spool/cron/root - echo "* * * * * sleep 20;python3 $RESOURCE_DIR/prometheus/prometheus_get_node.py" >> /var/spool/cron/root - echo "* * * * * sleep 30;python3 $RESOURCE_DIR/prometheus/prometheus_get_node.py" >> /var/spool/cron/root - echo "* * * * * sleep 40;python3 $RESOURCE_DIR/prometheus/prometheus_get_node.py" >> /var/spool/cron/root - echo "* * * * * sleep 50;python3 $RESOURCE_DIR/prometheus/prometheus_get_node.py" >> /var/spool/cron/root + echo "* * * * * python3 $RESOURCE_DIR/prometheus/prometheus_get_node.py" ${APP_HOME} >> /var/spool/cron/root + echo "* * * * * sleep 10;python3 $RESOURCE_DIR/prometheus/prometheus_get_node.py" ${APP_HOME} >> /var/spool/cron/root + echo "* * * * * sleep 20;python3 $RESOURCE_DIR/prometheus/prometheus_get_node.py" ${APP_HOME} >> /var/spool/cron/root + echo "* * * * * sleep 30;python3 $RESOURCE_DIR/prometheus/prometheus_get_node.py" ${APP_HOME} >> /var/spool/cron/root + echo "* * * * * sleep 40;python3 $RESOURCE_DIR/prometheus/prometheus_get_node.py" ${APP_HOME} >> /var/spool/cron/root + echo "* * * * * sleep 50;python3 $RESOURCE_DIR/prometheus/prometheus_get_node.py" ${APP_HOME} >> /var/spool/cron/root } main() @@ -161,7 +156,7 @@ main() # bash -x local_copy_pkg.sh install_grafana install_prometheus - upload_node_exporter + download_node_exporter start_grafana_service start_prometheus_service diff --git a/script/monitor/netinfo_dashboard.json b/script/server/monitor/netinfo_dashboard.json similarity index 100% rename from script/monitor/netinfo_dashboard.json rename to script/server/monitor/netinfo_dashboard.json diff --git a/script/monitor/netinfo_dashboard_new.json b/script/server/monitor/netinfo_dashboard_new.json similarity index 100% rename from script/monitor/netinfo_dashboard_new.json rename to script/server/monitor/netinfo_dashboard_new.json diff --git a/script/monitor/prometheus_get_node.py b/script/server/monitor/prometheus_get_node.py similarity index 58% rename from script/monitor/prometheus_get_node.py rename to script/server/monitor/prometheus_get_node.py index d418a274..24a2f09c 100755 --- a/script/monitor/prometheus_get_node.py +++ b/script/server/monitor/prometheus_get_node.py @@ -1,20 +1,26 @@ #!/usr/bin/python3 import requests import json +import sys + +if len(sys.argv) < 2: + fname="monitor/prometheus/node/node.json" +else: + fname=sys.argv[1]+"/monitor/prometheus/node/node.json" -fname="/usr/local/sysom/monitor/prometheus/node/node.json" host_api="http://localhost/api/v1/host" hostlist = requests.get(host_api) res = hostlist.content +print(res) hosts = json.loads(res) -host_len = len(hosts["data"]["results"]) +host_len = len(hosts["data"]) iplist=[] fo = open(fname,"w") for i in range(0,host_len): - iplist.append(hosts["data"]["results"][i]["ip"]+":9100") + iplist.append(hosts["data"][i]["ip"]+":9100") target={"targets":iplist} res="["+json.dumps(target)+"]" diff --git a/script/monitor/sysom-dashboard.json b/script/server/monitor/sysom-dashboard.json similarity index 100% rename from script/monitor/sysom-dashboard.json rename to script/server/monitor/sysom-dashboard.json diff --git a/script/server/monitor/test.py b/script/server/monitor/test.py new file mode 100644 index 00000000..27f960ad --- /dev/null +++ b/script/server/monitor/test.py @@ -0,0 +1,17 @@ +# test_命令行传参.py +import sys +def para_input(): + print(len(sys.argv)) # 参数序列的长度,此时所有参数存放在一个list之中 + if len(sys.argv) < 2: + sys.exit("python error") + script_name = sys.argv[0] #第一个参数指的是脚本名称 + param_first = sys.argv[1] #第二个参数,此时也是一个str列表 + param_second = sys.argv[2] #第三个参数 + + print(script_name) + print(param_first) + print(param_second) + print(type(param_second)) + print(script_name, param_first, param_second) +if __name__ == "__main__": + para_input() diff --git a/script/server/vmcore/init.sh b/script/server/vmcore/init.sh new file mode 120000 index 00000000..76eb4bdd --- /dev/null +++ b/script/server/vmcore/init.sh @@ -0,0 +1 @@ +init_server.sh \ No newline at end of file diff --git a/script/server/vmcore/init_server.sh b/script/server/vmcore/init_server.sh new file mode 100644 index 00000000..fa933c7c --- /dev/null +++ b/script/server/vmcore/init_server.sh @@ -0,0 +1,15 @@ +#! /bin/sh +yum install nfs-utils rpcbind -y +systemctl start rpcbind && systemctl enable rpcbind +systemctl start nfs && systemctl enable nfs + +internal_net_seg=`echo ${SERVER_IP} | awk -F"." '{print $1"."$2"."$3}'` +file_path=${APP_HOME}/vmcore/vmcore-nfs +mkdir -p ${file_path} +echo "${file_path} ${internal_net_seg}.0/24(rw,async)" >> /etc/exports +exportfs -rv +chmod -R 777 ${file_path} + +cp parse_panic.py ${APP_HOME}/vmcore +cp vmcore_const.py ${APP_HOME}/vmcore +echo "* * * * * pushd ${APP_HOME}/vmcore;python3 parse_panic.py ${file_path};popd" >> /var/spool/cron/root diff --git a/script/server/vmcore/parse_panic.py b/script/server/vmcore/parse_panic.py new file mode 100644 index 00000000..9d57c3a1 --- /dev/null +++ b/script/server/vmcore/parse_panic.py @@ -0,0 +1,500 @@ +# -*- coding: utf-8 -*- +# @Author: lichen/zhilan + +import os +import sys +import time +import subprocess +import re +import sqlite3 +import json +import traceback +import importlib +import argparse +import requests +import vmcore_const +import time +from datetime import datetime +import threading +import queue +queue = queue.Queue() + +if sys.version[0] == '2': + reload(sys) + sys.setdefaultencoding('utf8') + +# crashkey_type={ +# 0:func_name +# 1:calltrace +# 2:crashkey +# 3:bugon_file +#} +nfs_root = '/usr/vmcore-nfs' +root_url = 'http://127.0.0.1:7001' +ltime_pattern = re.compile(r'^\[\s*([0-9]+)\..*\]') +rip_pattern = re.compile(r'\[\s*\S+\] RIP: 0010:.*\[<([0-9a-f]+)>\] (.+)') +rip_pattern_1 = re.compile(r'\[\s*\S+\] RIP: 0010:(\S+)') +rip_pattern_2 = re.compile(r'\[\s*\S+\] RIP .*\[<([0-9a-f]+)>\] (.+)') +ripmod_pattern = re.compile(r'\[\s*\S+\] RIP.* \[(\S+)\]$') +bugat_pattern = re.compile(r'.+\] kernel BUG at (\S+)!') +ver_pattern = re.compile(r'Comm: (\S*).*(Tainted:|Not tainted).* (\S+) #') +unload_pattern = re.compile(r'\[last unloaded: (\S+)\]') +title_pattern = re.compile(r'\[\s*\S+\] ((BUG:|Kernel panic|Bad pagetable:|divide error:|kernel BUG at|general protection fault:) .+)') +vertype_pattern = re.compile(r'(\d+)\.(\d+)\.') +last_strhost = '' + +ignore_funcs = ["schedule","schedule_timeout","ret_from_fork","kthread", + "do_syscall_64","entry_SYSCALL_64_after_swapgs","system_call_fastpath","fastpath", + "entry_SYSCALL_64_after_hwframe", + "page_fault","do_page_fault","_do_page_fault","worker_thread", + "start_secondary","cpu_startup_entry","arch_cpu_idle","default_idle", + "do_IRQ","common_interrupt","irq_exit","do_softirq", + "__schedule","io_schedule_timeout","io_schedule","dump_stack", + "exit_to_usermode_loop","stub_clone","schedule_preempt_disabled","oom_kill_process", + "unwind_backtrace","dump_header","show_stack","dump_backtrace","panic","watchdog_timer_fn", + "nmi_panic","watchdog_overflow_callback","__perf_event_overflow","perf_event_overflow","intel_pmu_handle_irq", + "perf_event_nmi_handler","nmi_handle","do_nmi","end_repeat_nmi","watchdog", + "__hrtimer_run_queues","hrtimer_interrupt","local_apic_timer_interrupt","smp_apic_timer_interrupt","apic_timer_interrupt", + "__pv_queued_spin_lock_slowpath","queued_spin_lock_slowpath" +] + +def get_column_value(column, line): + match = rip_pattern.match(line) + if match is None: + match = rip_pattern_2.match(line) + + if column['func_name']=='NA' and match: + column['rip']=match.group(1) + column['func_name']=match.group(2).split('+')[0] + column['func_name']=column['func_name'].split('.')[0] + ripmod_match = ripmod_pattern.match(line.strip()) + if ripmod_match: + column['ripmod']=ripmod_match.group(3) + match = rip_pattern_1.match(line) + if column['func_name']=='NA' and column.get('func_name_1','') =='NA' and match: + column['func_name_1']=match.group(1).split('+')[0] + column['func_name_1']=column['func_name_1'].split('.')[0] + + match = bugat_pattern.match(line) + if match: + column['bugat'] = match.group(1) + idx = line.find('Comm:') + if idx > 0: + match = ver_pattern.match(line, idx) + if match: + column['comm'] = match.group(1) + column['ver'] = match.group(3) + idx = line.find('[last unloaded:') + if idx > 0: + match = unload_pattern.match(line, idx) + if match: + column['unload'] = match.group(1) + match = title_pattern.match(line) + if match and column['title'] == 'NA': + column['title'] = match.group(1) + if column['func_name'] != 'NA': + column['tmp_func_name'] = column['func_name'] + column['tmp_rip'] = column['rip'] + column['tmp_ripmod'] = column['ripmod'] + column['func_name'] = '' + column['rip'] = '' + column['ripmod'] = '' + +def get_stamp(line): + match = ltime_pattern.match(line) + if match: + return int(match.group(1)) + return 0 + +def get_last_time(f): + ret = 10 + try: + f.seek(-512, os.SEEK_END) + except: + pass + for line in f.readlines(): + ret = get_stamp(line) + if ret > 0: + break + f.seek(0, os.SEEK_SET) + return ret-10 + +def fix_func_name(column): + if column['dmesg'].find('SysRq : Trigger a crash') > 0: + column['func_name'] = 'sysrq_handle_crash' + column['title'] = 'sysrq: SysRq : Trigger a crash' + column['status'] = vmcore_const.STATUS_SYSRQ + column['crashkey_type'] = 2 + column['crashkey'] = 'sysrq_handle_crash' + if column['dmesg'].find('Kernel panic - not syncing: Fatal machine check') > 0: + column['func_name'] = 'fatal_machine_check' + column['title'] = 'Kernel panic - not syncing: Fatal machine check' + column['status'] = vmcore_const.STATUS_HWERROR + column['crashkey_type'] = 2 + column['crashkey'] = 'fatal_machine_check' + column['panic_class'] = 'HardwareError' + if column['dmesg'].find('Kernel panic - not syncing: Fatal hardware error') > 0: + column['func_name'] = 'fatal_hardware_error' + column['title'] = 'Kernel panic - not syncing: Fatal machine check' + column['status'] = vmcore_const.STATUS_HWERROR + column['crashkey_type'] = 2 + column['crashkey'] = 'fatal_hardware_error' + column['panic_class'] = 'HardwareError' + if column['dmesg'].find('Fatal local machine check') > 0: + column['func_name'] = 'fatal_machine_check' + column['title'] = 'Kernel panic - not syncing: Fatal local machine check' + column['status'] = vmcore_const.STATUS_HWERROR + column['crashkey_type'] = 2 + column['crashkey'] = 'fatal_machine_check' + column['panic_class'] = 'HardwareError' + if 'bugat' in column: + column['bugon_file'] = column['bugat'].split(':')[0] + column['crashkey_type'] = 3 + +def parse_file(name, column): + f = open(name, 'r') + result = '' + for line in f.readlines(): + if line.find('Modules linked in') >= 0: + column['modules'] = line[line.find(':')+1:] + if len(column['modules']) >= 512: + column['modules'] = column['modules'][:-512] + result += line + get_column_value(column, line) + f.close() + column['dmesg'] = result + column['dmesg_file'] = name + if 'tmp_func_name' in column and column['func_name'] == 'NA' and column['tmp_func_name'] != 'NA': + column['func_name'] = column['tmp_func_name'] + column['rip'] = column['tmp_rip'] + column['ripmod'] = column['ripmod'] + fix_func_name(column) + if column['ripmod'] != 'NA': + if column['ripmod'] not in vmcore_const.BASEMODS: + column['panic_class'] = 'Module(%s)'%(column['ripmod']) + +line_pattern = re.compile(r'.+[0-9]+\].+\[.*\][? ]* (\S+)\+0x(\S+)/0x(\S+)') +line_pattern_1 = re.compile(r'.+[0-9]+\][? ]*(\S+)\+0x(\S+)/0x(\S+)') +def get_calltrace(column): + meettitle = 0 + list1 = [] + lines = column['dmesg'].split('\n') + modname = [] + tmplist = [] + workqueue = '' + nocalltrace = True + hung_flag = False + if column['title'].find('unrecovered softlockup') >= 0: + hung_flag = True + + invalidrip = False + if (column['rip'] == 'NA'and column['func_name'] == 'NA') or column['func_name'].startswith('0x'): + invalidrip = True + + badrip = False + if column['dmesg'].find('Code: Bad RIP value.') >= 0: + badrip = True + + question_continue = True + question_count = 0 + + for r in lines: + if r.find(column['title']) >= 0: + nocalltrace = True + meettitle = 1 + tmplist.extend(list1) + del list1[:] + question_count = 0 + question_continue = True + continue + + if r.find('Workqueue: events ') >= 0: + idx = r.find('Workqueue: events ') + workqueue = r[idx+18:] + + if r.find('EFLAGS: ') >= 0: + idx = r.find('EFLAGS: ') + eflags = r[idx+8:] + #print 'eflags',eflags + try: + eflags = int(eflags,16) + if (eflags >> 9) % 2 == 0: + badrip = True + except: + pass + if r.find("<>") >= 0: + if column['func_name'] == 'NA': + tmpline = lines[lines.index(r)-1] + m = line_pattern.match(tmpline) + if m: + column['func_name'] = m.group(1) + else: + m = line_pattern_1.match(tmpline) + if m: + column['func_name'] = m.group(1) + + if r.find('') >= 0: + badrip = True + + if hung_flag and r.find('') >= 0: + try: + if r.find('> ') >= 0 and r.find(' <') >= 0: + idx = r.find(' <') + idx2 = r.rfind('> ',0) + r = r[0:idx] + r[idx2+1:] + except: + import traceback + traceback.print_exc() + del list1[:] + question_count = 0 + question_continue = True + + if r.find("Call Trace:") > 0 or r.find("<>") > 0 or r.find("") > 0 or r.find("") >= 0: + try: + if r.find('> ') >= 0 and r.find(' <') >= 0: + idx = r.find(' <') + idx2 = r.rfind('> ',0) + r = r[0:idx] + r[idx2+1:] + except: + import traceback + traceback.print_exc() + del list1[:] + question_count = 0 + question_continue = True + modname = [] + + if r.find('?') >= 0: + if workqueue != '' and r.find(workqueue) >= 0: + list1.append(workqueue) + #print r + #print invalidrip,badrip,question_continue + if invalidrip and badrip and question_continue: + m2 = line_pattern.match(r) + if m2: + #print m2.group(1),m2.group(2),m2.group(3) + if m2.group(1).split('.')[0] == column['func_name'] or m2.group(1) in ignore_funcs: + continue + nocalltrace = False + if m2.group(2) != m2.group(3): + tmp = m2.group(1) + tmp = tmp.split('.')[0] + list1.append(tmp) + #print 'append: ',m2.group(1) + #print list1 + question_count += 1 + else: + m2 = line_pattern_1.match(r) + if m2: + #print m2.group(1),m2.group(2),m2.group(3) + if m2.group(1).split('.')[0] == column['func_name'] or m2.group(1) in ignore_funcs: + continue + nocalltrace = False + if m2.group(2) != m2.group(3): + tmp = m2.group(1) + tmp = tmp.split('.')[0] + list1.append(tmp) + #print 'append: ',m2.group(1) + #print list1 + question_count += 1 + continue + if question_count > 0: + question_continue = False + + m = line_pattern.match(r) + if m: + nocalltrace = False + if m.group(1).split('.')[0] == column['func_name'] or m.group(1) in ignore_funcs: + continue + if m.group(1) == 'panic': + del list1[:] + question_count = 0 + question_continue = True + modname = [] + continue + if len(list1) == 0 and m.group(1) in ignore_funcs: + continue + if len(modname) < 2: + modname.append(r.strip()) + tmp = m.group(1) + tmp = tmp.split('.')[0] + list1.append(tmp) + #print 'append: ',m.group(1) + #print list1 + else: + m = line_pattern_1.match(r) + if m: + nocalltrace = False + if m.group(1).split('.')[0] == column['func_name'] or m.group(1) in ignore_funcs: + continue + if m.group(1) == 'panic': + del list1[:] + question_count = 0 + question_continue = True + modname = [] + continue + if len(list1) == 0 and m.group(1) in ignore_funcs: + continue + if len(modname) < 2: + modname.append(r.strip()) + tmp = m.group(1) + tmp = tmp.split('.')[0] + list1.append(tmp) + #print 'append: ',m.group(1) + #print list1 + else: + if len(list1) > 0 and meettitle == 1: + break + if len(list1) == 0 and nocalltrace: + list1 = tmplist + + if column['func_name'] == 'NA' and len(list1) > 0: + column['func_name'] = list1[0] + del list1[0] + + calltrace = column['func_name'] + if calltrace != '': + calltrace = calltrace.split('+')[0] + if len(list1) > 2: + list1 = list1[0:2] + for i in list1: + calltrace = ''.join([calltrace,'$',i]) + column['calltrace'] = calltrace + + +def clarify_panic_type(column): + column['panic_type'] = 0 + if column['title'].find('divide error') >= 0: + column['panic_type'] = vmcore_const.PANIC_DIVIDEERROR + elif column['bugon_file'] != 'NA': + column['panic_type'] = vmcore_const.PANIC_BUGON + elif column['title'].find('NULL pointer dereference') >= 0: + column['panic_type'] = vmcore_const.PANIC_NULLPOINTER + elif column['title'].find('Kernel stack is corrupted') >= 0: + column['panic_type'] = vmcore_const.PANIC_STACKCORRUPTION + elif column['title'].find('hard LOCKUP') >= 0: + column['panic_type'] = vmcore_const.PANIC_HARDLOCKUP + elif column['title'].find('hung_task') >= 0: + column['panic_type'] = vmcore_const.PANIC_HUNGTASK + elif column['title'].find('RCU Stall') >= 0: + column['panic_type'] = vmcore_const.PANIC_RCUSTALL + elif (column['title'].find('soft lockup') >= 0 or column['title'].find('softlockup') >= 0): + column['panic_type'] = vmcore_const.PANIC_SOFTLOCKUP + +def check_panic(column): + if 'rawdmesg' not in column and os.path.isfile(column['dmesg_file']) == False: + return False + + matched = False + if 'rawdmesg' in column: + parse_rawdmesg(column) + else: + parse_file(column['dmesg_file'], column) + + m = vertype_pattern.match(column['ver']) + if m: + column['vertype'] = int(m.group(1)) * 100 + int(m.group(2)) + + get_calltrace(column) + if column['calltrace'] == 'NA': + column['crashkey_type'] = 0 + if column['crashkey_type'] == 0 and column['func_name'] != 'NA': + column['crashkey'] = '%d$%s'%(column['vertype'],column['func_name']) + elif column['crashkey_type'] == 1 and column['calltrace'] != 'NA': + column['crashkey'] = '%d$%s'%(column['vertype'],column['calltrace']) + elif column['crashkey_type'] == 2 and column['crashkey'] != 'NA': + column['crashkey'] = '%d$%s'%(column['vertype'],column['crashkey']) + elif column['crashkey_type'] == 3 and column['bugon_file'] != 'NA': + column['crashkey'] = '%d$%s$%s'%(column['vertype'],column['bugon_file'],column['calltrace']) + + clarify_panic_type(column) + #return False + + ip={'ip':column['ip']} + host_url = root_url+"/api/v1/host/" + res = requests.get(host_url,params=ip) + if res.status_code != 200 or res.text == '[]': + print("查询主机名失败") + return False + + column['hostname'] = res.json()['data'][0]['hostname'] + vmcore_url = root_url+"/api/v1/vmcore/" + data = json.dumps(column) + headers = {'content-type': 'application/json'} + res = requests.post(url=vmcore_url, data=data, headers=headers) + print(res.json()) + if res.status_code == 200: + print(f"add {column['name']} to db") + return True + else: + print("插入失败") + return False + +def do_cmd(cmd): + output = os.popen(cmd) + ret = output.read().strip() + output.close() + return ret + +def init_column(column): + column['upload_time'] = int(time.time()) + column['vmcore_file'] = 'NA' + column['dmesg_file'] = 'NA' + column['rip'] = 'NA' + column['ripmod'] = 'NA' + column['comm'] = 'NA' + column['ver'] = 'NA' + column['vertype'] = 0 + column['func_name'] = 'NA' + column['title'] = 'NA' + column['status'] = 0 + column['calltrace'] = 'NA' + column['bugon_file'] = '' + column['crashkey_type'] = 1 + column['crashkey'] = 'NA' + column['modules'] = 'NA' + column['panic_type'] = 0 + column['panic_class'] = 'BaseKernel' + column['issue_id'] = 0 + +def parse_new_crash(crash_dir): + try: + column = {} + column['name'] = crash_dir.split('/')[-1] + core_time = column['name'].split('_')[0] + core_time = datetime.strptime(core_time, "%Y%m%d%H%M%S") + column['core_time'] = core_time.strftime("%Y-%m-%d %H:%M:%S") + column['ip'] = column['name'].split('_')[1] + column['hostname'] = column['name'].split('_')[1] + init_column(column) + column['dmesg_file'] = '%s/vmcore-dmesg.txt' % crash_dir + ret = check_panic(column) + if ret: + with open('%s/.upload'%crash_dir,'w'): + pass + except: + import traceback + traceback.print_exc() + +def main(): + global nfs_root + if len(sys.argv) > 1: + nfs_root = sys.argv[1] + dirs_list = [] + #while True: + files = os.listdir(nfs_root) + files_path = [f'{nfs_root}/{file}' for file in files] + for file in files_path: + if os.path.isfile(file): + continue + dirs_list.append(file) + dirs_list.sort(key=lambda fp: os.path.getmtime(fp),reverse=True) + for dir in dirs_list: + tmp = '%s/.upload' % dir + if os.path.exists(tmp): + break + parse_new_crash(dir) + #time.sleep(20) + + +if __name__ == "__main__": + main() diff --git a/script/server/vmcore/vmcore_const.py b/script/server/vmcore/vmcore_const.py new file mode 100644 index 00000000..8f9ac84a --- /dev/null +++ b/script/server/vmcore/vmcore_const.py @@ -0,0 +1,25 @@ +STATUS_HWERROR=5 +STATUS_SYSRQ=6 +# panic type +PANIC_UAF=1 +PANIC_DOUBLEFREE=2 +PANIC_OOBREAD=3 +PANIC_OOBWRITE=4 +PANIC_NULLPOINTER=5 +PANIC_UNINITVAR=6 +PANIC_STACKCORRUPTION=7 +PANIC_INVALIDIPPTR=8 +PANIC_INVALIDDATAPTR=9 +PANIC_BUGON=10 +PANIC_DIVIDEERROR=11 +PANIC_HARDLOCKUP=12 +PANIC_SOFTLOCKUP=13 +PANIC_HUNGTASK=14 +PANIC_RCUSTALL=15 + +BASEMODS = ['ext4','jbd2','overlay','libata','libiscsi','bridge','nf_conntrack','nf_conntrack_ipv4', + 'nf_nat','nf_nat_ipv4','iptable_nat','tun','binfmt_misc','xt_CHECKSUM','iptable_mangle', + 'nf_defrag_ipv4','xt_conntrack','ipt_REJECT','nf_reject_ipv4','stp','llc','ebtable_filter', + 'ebtables','ip6_tables','iptable_filter','iscsi_tcp','libiscsi_tcp','scsi_transport_iscsi', + 'bonding','dm_mod','sg','ip_tables','mbcache','sd_mod','mpt3sas','raid_class','scsi_transport_sas', + 'ahci','libahci','btrfs','zram','numa_balancer'] diff --git a/tools/deploy/deploy.sh b/tools/deploy/deploy.sh index 95f512ba..88d6a230 100755 --- a/tools/deploy/deploy.sh +++ b/tools/deploy/deploy.sh @@ -8,14 +8,24 @@ #***************************************************************# ALIYUN_MIRROR="https://mirrors.aliyun.com/pypi/simple/" -APP_HOME="/home/sysom" APP_NAME="sysom" API_DIR="sysom_api" WEB_DIR="sysom_web" +SCRIPT_DIR="script" + +if [ $# != 3 ] ; then + echo "USAGE: $0 INSTALL_DIR Internal_IP EXTERNAL_IP" + echo " e.g.: $0 /usr/local/sysom 192.168.0.100 120.26.xx.xx" + exit 1 +fi + +APP_HOME=$1 +SERVER_IP=$2 +OUTER_IP=$3 + +export APP_HOME=${APP_HOME} +export SERVER_IP=${SERVER_IP} -if [ $1 ];then - APP_HOME=$1 -fi VIRTUALENV_HOME="${APP_HOME}/virtualenv" TARGET_PATH="${APP_HOME}/target" @@ -24,24 +34,7 @@ if [ "$UID" -ne 0 ]; then exit 1 fi -if [ -d ${APP_HOME} ]; then - read -r -p "${APP_HOME} Already Exists!!! Are You Sure deploy sysom in this path Again? [Y/N] " input - case $input in - [yY][eE][sS]|[yY]) - rm -rf ${APP_HOME} - mkdir -p ${APP_HOME} - ;; - [nN][nO]|[nN]) - exit 0 - ;; - *) - echo "Invalid input..." - exit 1 - ;; - esac -else - mkdir -p ${APP_HOME} -fi +mkdir -p ${APP_HOME} touch_env_rpms() { if [ -f /etc/alios-release ]; then @@ -150,6 +143,24 @@ start_app() { systemctl restart supervisord.service } +start_script_server() { + pushd ${SCRIPT_DIR}/server + bash -x init.sh + popd +} + +start_script_node() { + pushd ${SCRIPT_DIR}/node + bash -x pre_init.sh + popd +} + +modify_grafana_url() { + pushd ${TARGET_PATH}/${WEB_DIR} + sed -i "s/127.0.0.1:3000/${OUTER_IP}\/grafana/g" p__monitor__SystemDashboard*js + popd +} + deploy() { touch_env_rpms touch_virtualenv @@ -157,6 +168,9 @@ deploy() { check_requirements setup_database > ${APP_HOME}/logs/${APP_NAME}_setup_database.log 2>&1 init_conf + start_script_server + start_script_node + modify_grafana_url start_app } diff --git a/tools/deploy/sysom.conf b/tools/deploy/sysom.conf index 216a77e7..ffb731d3 100644 --- a/tools/deploy/sysom.conf +++ b/tools/deploy/sysom.conf @@ -13,12 +13,16 @@ server { gzip_types text/plain text/css text/javascript application/javascript application/json; gzip_vary on; - location ~ /api/ { - proxy_pass http://127.0.0.1:7001; - proxy_read_timeout 180s; - proxy_redirect off; - proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; - } + location /grafana/ { + proxy_pass http://localhost:3000/; + } + + location /api/ { + proxy_pass http://127.0.0.1:7001; + proxy_read_timeout 180s; + proxy_redirect off; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + } location / { try_files $uri /index.html; -- Gitee