forksand-it-manual/source/resources/apps/sharkfork-bootstrap/TUNING-NOTES

On sf-001 and sf-002 with disk platters:

# Does not survive reboot XXX
# sf-001 sf-002
echo 65536 > /sys/block/sdb/queue/nr_requests
echo 65536 > /sys/block/sdc/queue/nr_requests
echo 65536 > /sys/block/sdd/queue/nr_requests

# sf-001 sf-002
echo 8192 > /sys/block/sdb/queue/read_ahead_kb
echo 8192 > /sys/block/sdc/queue/read_ahead_kb
echo 8192 > /sys/block/sdd/queue/read_ahead_kb

# MTU 9000 for Ceph
# sf-001 sf-002
ip link set dev eno2 mtu 9000
ip link set dev enp134s0f1 mtu 9000
# sf-003
ip link set dev eno2 mtu 9000
ip link set dev enp216s0f0 mtu 9000
# sf-004
ip link set dev eno2 mtu 9000
ip link set dev enp216s0f1 mtu 9000
# On all sf-001 sf-002 sf-003 sf-004
# XXX fail
#ip link set dev bond0 mtu 9000
ifconfig bond0 mtu 9000


# Only use 3 mon for Ceph. So remove it from sf-001

# Create 2 OSD per NVMe ?

##############################################################################
# sysctl.conf possibilities from:
# http://tracker.ceph.com/projects/ceph/wiki/Tuning_for_All_Flash_Deployments

# disable TIME_WAIT.. wait ..
net.ipv4.tcp_tw_recycle = 1
net.ipv4.tcp_tw_reuse = 1

# Controls the use of TCP syncookies
net.ipv4.tcp_syncookies = 0

# double amount of allowed conntrack
net.netfilter.nf_conntrack_max = 2621440
net.netfilter.nf_conntrack_tcp_timeout_established = 1800

# Disable netfilter on bridges.
net.bridge.bridge-nf-call-ip6tables = 0
net.bridge.bridge-nf-call-iptables = 0
net.bridge.bridge-nf-call-arptables = 0

# Controls the maximum size of a message, in bytes
kernel.msgmnb = 65536

# Controls the default maxmimum size of a mesage queue
kernel.msgmax = 65536

# Controls the maximum shared segment size, in bytes
kernel.shmmax = 68719476736

# Controls the maximum number of shared memory segments, in pages
kernel.shmall = 4294967296

##############################################################################

# Increase PG ? Currently at 256.

# Is scrubbing enabled?

##########################
# sample:
ceph --admin-daemon /var/run/ceph/ceph-osd.0.asok config show | grep thread

ceph tell osd.* injectargs '--osd_disk_threads 4'
# Resulted in this line for each OSD (16 total):
osd.0: osd_disk_threads = '4' rocksdb_separate_wal_dir = 'false' (not observed, change may require restart)

# http://xiaoquqi.github.io/blog/2015/06/28/ceph-performance-optimization-summary/

# Turn off NUMA. Add to grub kernel boot line:
numa=off
# Run
update-grub
# With NUMA enabled (from dmesg):
[Wed Jul 11 15:52:46 2018] NUMA: Initialized distance table, cnt=2
[Wed Jul 11 15:52:46 2018] NUMA: Node 0 [mem 0x00000000-0x7fffffff] + [mem 0x100000000-0x87fffffff] -> [mem 0x00000000-0x87fffffff]
[Wed Jul 11 15:52:46 2018] mempolicy: Enabling automatic NUMA balancing. Configure with numa_balancing= or the kernel.numa_balancing sysctl
[Wed Jul 11 15:52:46 2018] pci_bus 0000:00: on NUMA node 0
etc.

# CGroup
# Pin each OSD to a CPU, somehow


# Increase default values in Ceph config


# Create fast pool from command line:
ceph osd crush rule create-replicated <rule-name> <root> <failure-domain> <class>
ceph osd crush rule create-replicated fast default host nvme
# and? Didnt appear in proxmox interface
# Then create a pool in the web gui.


# Then do this to have pool use new rule:
#ceph osd pool set nvmepool crush_rule fast
# nope


# Create disk platter crush rule for slow pool from command line:
ceph osd crush rule create-replicated slow default host hdd
# Then add pool with "slow" crush rule in web GUI

# Then add under Data Center --> Storage, the new pool. Use rbd, and krbd


##############################
# Change disk encryption password, check disk with crypto, then:
#cryptsetup -y luksAddKey /dev/sdb1
#cryptsetup luksRemoveKey /dev/sdb1

#cryptsetup -y luksAddKey /dev/sda2
#cryptsetup luksRemoveKey /dev/sda2