Centos7安装pbs
2023-12-16
2分钟阅读时长
机器配置
主机名 | ip |
---|---|
master | 192.168.2.1 |
node01 | 192.168.2.2 |
node02 | 192.168.2.3 |
node03 | 192.168.2.4 |
node04 | 192.168.2.5 |
node05 | 192.168.2.6 |
node07 | 192.168.2.7 |
安装
配置好主机名和SSH互相免密
安装依赖
yum -y install libX11-devel libXt-devel libedit-devel libical-devel ncurses-devel postgresql-devel python-devel python-devel tcl-devel tk-devel swig expat-devel openssl-devel libXext libXft gcc autoconf automake libtool hwloc-devel rpm-build
pbs github地址
Releases · openpbs/openpbs (github.com)
wget https://github.com/openpbs/openpbs/releases/download/v19.1.3/pbspro_19.1.3.centos_7.zip
tar zxvf pbspro_19.1.3.centos_7.zip
设置环境变量 追加到/etc/profile
export PBS_SERVER=master
export PBS_EXEC=/opt/pbs
export PBS_HOME=/var/spool/pbs
master节点 安装 pbspro-server-19.1.3-0.x86_64.rpm
node节点 安装 pbspro-execution-19.1.3-0.x86_64.rpm
master节点和node节点依次启动pbs
systemctl start pbs
如果想让master节点也参与计算,将/etc/pbs.conf 中 PBS_START_MOM的值由0修改为1 ,其他节点psb.conf不用改变
配置
master节点其他配置
qmgr -c "set server flatuid=true"
qmgr -c "set server scheduling=true"
qmgr -c "s s job_history_enable=1"
master节点添加节点node01
qmgr -c "create node node01"
查看节点状态
pbsnodes -aSj
master节点创建队列big
qmgr -c "create queue big"
将node7添加到队列big
qmgr -c "set node node7 queue=big"
设置big队列资源限制
qmgr -c "set queue big resources_max.ncpus = 80"
qmgr -c "set queue big resources_max.mem = 503"
qmgr -c "set queue big resources_max.nodect = 1"
启用big队列
qmgr -c "set queue big enabled = true"
qmgr -c "set queue big started = true"
查看big队列参数
qstat -Q -f big
注意 普通用户提交作业前需要配置和计算节点间的SSH互相免密
配置gpu队列
管理节点 /var/spool/pbs/sched_priv/sched_config 新增
resources: "ncpus, mem, arch, host, vnode, aoe, eoe, ngpus, gpu_id"
然后重启pbs
service pbs restart
设置下gpu节点的资源
# qmgr
set node node1 resources_available.ncpus=48, resources_available.mem=100gb, resources_available.ngpus=2
创建gpu队列qgpu 并重启
# qmgr
create queue qgpu
set queue qgpu queue_type = Execution
set queue qgpu resources_default.nodect = 1
set queue qgpu resources_default.nodes = node1
set queue qgpu resources_default.ngpus = 2
set queue qgpu resources_default.ncpus = 48
set queue qgpu enabled = True
set queue qgpu started = True
exit
# service pbs restart