|
|
|
On sf-001 and sf-002 with disk platters:
|
|
|
|
|
|
|
|
# Does not survive reboot XXX
|
|
|
|
# sf-001 sf-002
|
|
|
|
echo 65536 > /sys/block/sdb/queue/nr_requests
|
|
|
|
echo 65536 > /sys/block/sdc/queue/nr_requests
|
|
|
|
echo 65536 > /sys/block/sdd/queue/nr_requests
|
|
|
|
|
|
|
|
# sf-001 sf-002
|
|
|
|
echo 8192 > /sys/block/sdb/queue/read_ahead_kb
|
|
|
|
echo 8192 > /sys/block/sdc/queue/read_ahead_kb
|
|
|
|
echo 8192 > /sys/block/sdd/queue/read_ahead_kb
|
|
|
|
|
|
|
|
# MTU 9000 for Ceph
|
|
|
|
# sf-001 sf-002
|
|
|
|
ip link set dev eno2 mtu 9000
|
|
|
|
ip link set dev enp134s0f1 mtu 9000
|
|
|
|
# sf-003
|
|
|
|
ip link set dev eno2 mtu 9000
|
|
|
|
ip link set dev enp216s0f0 mtu 9000
|
|
|
|
# sf-004
|
|
|
|
ip link set dev eno2 mtu 9000
|
|
|
|
ip link set dev enp216s0f1 mtu 9000
|
|
|
|
# On all sf-001 sf-002 sf-003 sf-004
|
|
|
|
# XXX fail
|
|
|
|
#ip link set dev bond0 mtu 9000
|
|
|
|
ifconfig bond0 mtu 9000
|
|
|
|
|
|
|
|
|
|
|
|
# Only use 3 mon for Ceph. So remove it from sf-001
|
|
|
|
|
|
|
|
# Create 2 OSD per NVMe ?
|
|
|
|
|
|
|
|
##############################################################################
|
|
|
|
# sysctl.conf possibilities from:
|
|
|
|
# http://tracker.ceph.com/projects/ceph/wiki/Tuning_for_All_Flash_Deployments
|
|
|
|
|
|
|
|
# disable TIME_WAIT.. wait ..
|
|
|
|
net.ipv4.tcp_tw_recycle = 1
|
|
|
|
net.ipv4.tcp_tw_reuse = 1
|
|
|
|
|
|
|
|
# Controls the use of TCP syncookies
|
|
|
|
net.ipv4.tcp_syncookies = 0
|
|
|
|
|
|
|
|
# double amount of allowed conntrack
|
|
|
|
net.netfilter.nf_conntrack_max = 2621440
|
|
|
|
net.netfilter.nf_conntrack_tcp_timeout_established = 1800
|
|
|
|
|
|
|
|
# Disable netfilter on bridges.
|
|
|
|
net.bridge.bridge-nf-call-ip6tables = 0
|
|
|
|
net.bridge.bridge-nf-call-iptables = 0
|
|
|
|
net.bridge.bridge-nf-call-arptables = 0
|
|
|
|
|
|
|
|
# Controls the maximum size of a message, in bytes
|
|
|
|
kernel.msgmnb = 65536
|
|
|
|
|
|
|
|
# Controls the default maxmimum size of a mesage queue
|
|
|
|
kernel.msgmax = 65536
|
|
|
|
|
|
|
|
# Controls the maximum shared segment size, in bytes
|
|
|
|
kernel.shmmax = 68719476736
|
|
|
|
|
|
|
|
# Controls the maximum number of shared memory segments, in pages
|
|
|
|
kernel.shmall = 4294967296
|
|
|
|
|
|
|
|
##############################################################################
|
|
|
|
|
|
|
|
# Increase PG ? Currently at 256.
|
|
|
|
|
|
|
|
# Is scrubbing enabled?
|
|
|
|
|
|
|
|
##########################
|
|
|
|
# sample:
|
|
|
|
ceph --admin-daemon /var/run/ceph/ceph-osd.0.asok config show | grep thread
|
|
|
|
|
|
|
|
ceph tell osd.* injectargs '--osd_disk_threads 4'
|
|
|
|
# Resulted in this line for each OSD (16 total):
|
|
|
|
osd.0: osd_disk_threads = '4' rocksdb_separate_wal_dir = 'false' (not observed, change may require restart)
|
|
|
|
|
|
|
|
# http://xiaoquqi.github.io/blog/2015/06/28/ceph-performance-optimization-summary/
|
|
|
|
|
|
|
|
# Turn off NUMA. Add to grub kernel boot line:
|
|
|
|
numa=off
|
|
|
|
# Run
|
|
|
|
update-grub
|
|
|
|
# With NUMA enabled (from dmesg):
|
|
|
|
[Wed Jul 11 15:52:46 2018] NUMA: Initialized distance table, cnt=2
|
|
|
|
[Wed Jul 11 15:52:46 2018] NUMA: Node 0 [mem 0x00000000-0x7fffffff] + [mem 0x100000000-0x87fffffff] -> [mem 0x00000000-0x87fffffff]
|
|
|
|
[Wed Jul 11 15:52:46 2018] mempolicy: Enabling automatic NUMA balancing. Configure with numa_balancing= or the kernel.numa_balancing sysctl
|
|
|
|
[Wed Jul 11 15:52:46 2018] pci_bus 0000:00: on NUMA node 0
|
|
|
|
etc.
|
|
|
|
|
|
|
|
# CGroup
|
|
|
|
# Pin each OSD to a CPU, somehow
|
|
|
|
|
|
|
|
|
|
|
|
# Increase default values in Ceph config
|
|
|
|
|
|
|
|
|
|
|
|
# Create fast pool from command line:
|
|
|
|
ceph osd crush rule create-replicated <rule-name> <root> <failure-domain> <class>
|
|
|
|
ceph osd crush rule create-replicated fast default host nvme
|
|
|
|
# and? Didnt appear in proxmox interface
|
|
|
|
# Then create a pool in the web gui.
|
|
|
|
|
|
|
|
|
|
|
|
# Then do this to have pool use new rule:
|
|
|
|
ceph osd pool set nvmepool crush_rule fast
|
|
|
|
# nope
|