Some more Proxmox/Ceph tuning notes

7 years ago · d99b907911
parent 8538d3b627
commit d99b907911
2 changed files with 99 additions and 1 deletions
--- a/source/resources/apps/sharkfork-bootstrap/TUNING-NOTES
+++ b/source/resources/apps/sharkfork-bootstrap/TUNING-NOTES
@ -1,11 +1,109 @@
 On sf-001 and sf-002 with disk platters:

-# XXX does this survive reboot ? XXX
+# Does not survive reboot XXX
+# sf-001 sf-002
 echo 65536 > /sys/block/sdb/queue/nr_requests
 echo 65536 > /sys/block/sdc/queue/nr_requests
 echo 65536 > /sys/block/sdd/queue/nr_requests

+# sf-001 sf-002
 echo 8192 > /sys/block/sdb/queue/read_ahead_kb
 echo 8192 > /sys/block/sdc/queue/read_ahead_kb
 echo 8192 > /sys/block/sdd/queue/read_ahead_kb

+# MTU 9000 for Ceph
+# sf-001 sf-002
+ip link set dev eno2 mtu 9000
+ip link set dev enp134s0f1 mtu 9000
+# sf-003
+ip link set dev eno2 mtu 9000
+ip link set dev enp216s0f0 mtu 9000
+# sf-004
+ip link set dev eno2 mtu 9000
+ip link set dev enp216s0f1 mtu 9000
+# On all sf-001 sf-002 sf-003 sf-004
+# XXX fail
+#ip link set dev bond0 mtu 9000
+ifconfig bond0 mtu 9000
+
+
+# Only use 3 mon for Ceph. So remove it from sf-001
+
+# Create 2 OSD per NVMe ?
+
+##############################################################################
+# sysctl.conf possibilities from:
+# http://tracker.ceph.com/projects/ceph/wiki/Tuning_for_All_Flash_Deployments
+
+# disable TIME_WAIT.. wait ..
+net.ipv4.tcp_tw_recycle = 1
+net.ipv4.tcp_tw_reuse = 1
+
+# Controls the use of TCP syncookies
+net.ipv4.tcp_syncookies = 0
+
+# double amount of allowed conntrack
+net.netfilter.nf_conntrack_max = 2621440
+net.netfilter.nf_conntrack_tcp_timeout_established = 1800
+
+# Disable netfilter on bridges.
+net.bridge.bridge-nf-call-ip6tables = 0
+net.bridge.bridge-nf-call-iptables = 0
+net.bridge.bridge-nf-call-arptables = 0
+
+# Controls the maximum size of a message, in bytes
+kernel.msgmnb = 65536
+
+# Controls the default maxmimum size of a mesage queue
+kernel.msgmax = 65536
+
+# Controls the maximum shared segment size, in bytes
+kernel.shmmax = 68719476736
+
+# Controls the maximum number of shared memory segments, in pages
+kernel.shmall = 4294967296
+
+##############################################################################
+
+# Increase PG ? Currently at 256.
+
+# Is scrubbing enabled?
+
+##########################
+# sample:
+ceph --admin-daemon /var/run/ceph/ceph-osd.0.asok config show | grep thread
+
+ceph tell osd.* injectargs '--osd_disk_threads 4'
+# Resulted in this line for each OSD (16 total):
+osd.0: osd_disk_threads = '4' rocksdb_separate_wal_dir = 'false' (not observed, change may require restart) 
+
+# http://xiaoquqi.github.io/blog/2015/06/28/ceph-performance-optimization-summary/
+
+# Turn off NUMA. Add to grub kernel boot line:
+numa=off
+# Run
+update-grub
+# With NUMA enabled (from dmesg):
+[Wed Jul 11 15:52:46 2018] NUMA: Initialized distance table, cnt=2
+[Wed Jul 11 15:52:46 2018] NUMA: Node 0 [mem 0x00000000-0x7fffffff] + [mem 0x100000000-0x87fffffff] -> [mem 0x00000000-0x87fffffff]
+[Wed Jul 11 15:52:46 2018] mempolicy: Enabling automatic NUMA balancing. Configure with numa_balancing= or the kernel.numa_balancing sysctl
+[Wed Jul 11 15:52:46 2018] pci_bus 0000:00: on NUMA node 0
+etc.
+
+# CGroup
+# Pin each OSD to a CPU, somehow
+
+
+# Increase default values in Ceph config
+
+
+# Create fast pool from command line:
+ceph osd crush rule create-replicated <rule-name> <root> <failure-domain> <class>
+ceph osd crush rule create-replicated fast default host nvme
+# and? Didnt appear in proxmox interface
+# Then create a pool in the web gui.
+
+
+# Then do this to have pool use new rule:
+ceph osd pool set nvmepool crush_rule fast
+# nope
--- a/source/resources/spreadsheets/sharkfork.ods
+++ b/source/resources/spreadsheets/sharkfork.ods