Skip to content

Running Cloudlab Experiments

Jeff LeFevre edited this page May 8, 2019 · 1 revision

This is just a template. it will change depending on what kind of setup is desired.

Example runs of installing Ceph and running some experiments on Cloudlab.

nodes

install ceph and ceph-deploy

on all nodes install ceph

sudo apt-get update; sudo dpkg -i /proj/skyhook-PG0/cephbits/*.deb; sudo apt-get install -f -y; sudo dpkg -i /proj/skyhook-PG0/cephbits/*.deb

# or..
sudo apt-get update; sudo dpkg -i /proj/zlog-PG0/cephbits/*.deb; sudo apt-get install -f -y; sudo dpkg -i /proj/zlog-PG0/cephbits/*.deb

install ceph-deploy on a coordination node:

  • install password-less sudo to all nodes (including the coordination node if it will also run a ceph daemon) from the coordination node.
sudo apt-get install -y python-virtualenv
mkdir cluster
cd cluster
virtualenv env
env/bin/pip install ceph-deploy

setup ceph

env/bin/ceph-deploy new client0

edit ceph.conf to add

osd pool default size = 1
osd pool default min size = 1
osd crush chooseleaf type = 0
osd pool default pg num = 128
osd pool default pgp num = 128

mon_allow_pool_delete = true
osd_class_load_list = *
osd_class_default_list = *

objecter_inflight_op_bytes = 2147483648

enable experimental unrecoverable data corrupting features = *

setup and start monitor

env/bin/ceph-deploy mon create-initial

what disks do we want to use on osd0

nwatkins@client0:~/cluster$ ssh osd0 lsblk
NAME   MAJ:MIN RM   SIZE RO TYPE MOUNTPOINT
sda      8:0    0   1.1T  0 disk 
├─sda1   8:1    0    16G  0 part /
├─sda2   8:2    0     3G  0 part 
├─sda3   8:3    0     3G  0 part [SWAP]
└─sda4   8:4    0   1.1T  0 part 
sdb      8:16   0   1.1T  0 disk 
sdc      8:32   0 447.1G  0 disk 

we'll use sdc which is an ssd

env/bin/ceph-deploy disk zap osd0:sdc
env/bin/ceph-deploy osd create osd0:sdc

OR create --bluestore

setup perms and other stuff

env/bin/ceph-deploy admin client0
sudo chmod a+r /etc/ceph/ceph.client.admin.keyring
ceph osd set noscrub
ceph osd set nodeep-scrub

verify installation

nwatkins@client0:~/cluster$ ceph status
    cluster e6f92ea1-48a6-42f7-8a29-cdf485d8bcb9
     health HEALTH_OK
     monmap e2: 1 mons at {client0=10.10.1.2:6789/0}
            election epoch 4, quorum 0 client0
        mgr active: client0 
     osdmap e5: 1 osds: 1 up, 1 in
            flags sortbitwise,require_jewel_osds,require_kraken_osds
      pgmap v8: 64 pgs, 1 pools, 0 bytes data, 0 objects
            34252 kB used, 441 GB / 441 GB avail
                  64 active+clean

loading data ( ceph pg-calc )

# create pools
ceph osd pool create tpc 256 256 replicated # see pg-calc for values
ceph osd pool set tpc size 1

# store data
rados-store-glob.sh tpc nrows1Bnobjs10K/*.bin

# execute query
bin/run-query --query d --build-index --num-objs 10000 --pool tpc --wthreads 24 --qdepth 24

restarting ceph cluster ( docs )

# view status of ceph services on each machine 
sudo systemctl status ceph\*.service ceph\*.target

# on each osd machine:
sudo systemctl start/stop/restart ceph-osd.target
ps -ef | grep ceph

# on each admin machine:
sudo systemctl start/stop/restart ceph-mon.target

# verify all ok
ceph status
ceph osd tree
ceph osd utilization
ceph osd perf
ceph pg dump summary --format plain
rados df

repart notes

nwatkins@client0:~/cluster$ skyhook-repart --num-objs 10 --obj-size 4 --pool rbd --row-size 192 --generate --nthreads 10
row-size: 192
obj-size: 4194432
num-objs: 10
rows/obj: 21846
seq-start: 0
seq-end: 10

nwatkins@client0:~/cluster$ rados -p rbd ls
src.5
src.3
src.4
src.6
src.1
src.7
src.9
src.8
src.2
src.0

nwatkins@client0:~/cluster$ rados -p rbd stat src.0
rbd/src.0 mtime 2017-06-01 17:15:18.000000, size 4194432

nwatkins@client0:~/cluster$ rados -p rbd ls
src.5
src.3
src.5.1496355396.1
src.8.1496355396.0
src.1.1496355396.1
src.5.1496355396.0
src.4
src.7.1496355396.1
src.9.1496355396.1
src.6
src.1
src.0.1496355396.0
src.8.1496355396.1
src.7
src.9
src.8
src.1.1496355396.0
src.7.1496355396.0
src.0.1496355396.1
src.2
src.9.1496355396.0
src.0
src.6.1496355396.0
src.3.1496355396.1
src.6.1496355396.1
src.3.1496355396.0
src.2.1496355396.0
src.4.1496355396.1
src.4.1496355396.0
src.2.1496355396.1

nwatkins@client0:~/cluster$ rados -p rbd stat src.2.1496355396.1
rbd/src.2.1496355396.1 mtime 2017-06-01 17:16:37.000000, size 2097216

nwatkins@client0:~/cluster$ skyhook-repart --num-objs 10 --obj-size 4 --pool rbd --row-size 192 --split --nthreads 10 --seq-start 5 --seq-end 10
row-size: 192
obj-size: 4194432
num-objs: 10
rows/obj: 21846
seq-start: 5
seq-end: 10

nwatkins@client0:~/cluster$ rados -p rbd ls | grep src.5\.
src.5.1496355396.1
src.5.1496355396.0
src.5.1496355467.1
src.5.1496355467.0
nwatkins@client0:~/cluster$ rados -p rbd ls | grep src.6\.
src.6.1496355467.0
src.6.1496355467.1
src.6.1496355396.0
src.6.1496355396.1
nwatkins@client0:~/cluster$ rados -p rbd ls | grep src.2\.
src.2.1496355396.0
src.2.1496355396.1
Clone this wiki locally