Versions Compared

Key

  • This line was added.
  • This line was removed.
  • Formatting was changed.

...

Simple Shell script to coordinate a cluster

  • In order to push configurations and binaries, start, stop, restart and purge the Ubuntu 12.04 LTS cluster this small script gets used on the FIZ cluster. In order to make this work without having to input passwords all the time for the sudo and ssh calls on the cluster nodes, I distributed a public ssh key on the cluster nodes for ssh auth and allowed the fcrepo user to execute rm, cp, service calls without a password.
  • The configuration of the FIZ cluster can be accessed here: https://github.com/fasseg/fiz-fcrepo-cluster-config. There is also a setenv.sh file in there which we symlinked to $TOMCAT_HOME/bin/setenv.sh, that sets the environment variables for the repository, jgroups and infinispan configuration.
  • So on each node the layout on the file system looks like this:
    • /data/fcrepo (the exploded war file, owned by fcrepo)
    • /home/fcrepo/fiz-cluster-config (the configuration and setenv.sh file, owned by fcrepo)
    • /var/lib/tomcat7/webapps/fedora (owned by root) symlinks to /data/fcrepo 
  • Using this setup jar updates can be pushed by the shell script to /data/fcrepo/WEB-INF/lib directly.
  • Pushing a new WAR file to the nodes requires unpacking the WAR to /data/fcrepo therefore access to /tmp is required.
  • The script is setup for six nodes with know IPs. So the node[] array will have to change for different configurations, as should the range defined in the for statements in start_cluster() purge_cluster() and stop_cluster().

Code Block
languagebash
titlecluster
collapsetrue
#!/bin/bash
nodes[0]=192.168.42.101
nodes[1]=192.168.42.102
nodes[2]=192.168.42.103
nodes[3]=192.168.42.104
nodes[4]=192.168.42.105
nodes[5]=192.168.42.106
start_node() {
	if [[ -z "$1" ]]; then
		echo "No node argument suplied [1-6]"
		exit 1
	fi
	nodeip=${nodes[(($1 - 1))]}
	echo -n "Starting node $nodeip..."
	ssh -qt fcrepo@$nodeip "sudo service tomcat7 start > /dev/null"
	curl -sf http://${nodeip}:8080/fcrepo/rest > /dev/null
	until [ $? -eq 0 ]
	do
		curl -sf http://${nodeip}:8080/fcrepo/rest > /dev/null
	done
	echo "done."
}
stop_node() {
	if [[ -z "$1" ]]; then
		echo "No node argument suplied [1-6]"
		exit 1
	fi
	nodeip=${nodes[(($1 - 1))]}
	echo -n "Stopping node $nodeip..."
	ssh -qt fcrepo@$nodeip "sudo service tomcat7 stop > /dev/null" 
	curl -sf http://${nodeip}:8080/fcrepo/rest > /dev/null
	until [ $? -gt 0 ]
	do
		curl -sf http://${nodeip}:8080/fcrepo/rest > /dev/null
	done
	echo "done."
}
restart_node() {
	stop_node $1
	start_node $1
}
start_cluster() {
	echo "Starting cluster"
	for node in 1 2 3 4 5 6
	do
		start_node $node
	done
}
stop_cluster() {
	echo "Stopping cluster"
	for node in 1 2 3 4 5 6
	do
		stop_node $node
	done
}	
restart_cluster() {
	stop_cluster
	start_cluster
}
status() {
	echo "Status of FIZ Fedora 4 cluster"
	for nodeip in "${nodes[@]}"
	do
		curl -sf http://${nodeip}:8080/fcrepo/rest > /dev/null
		if [ $? -gt 0 ];then
			echo "$nodeip is OFFLINE"
		else
			echo "$nodeip is online"
		fi
	done
}
purge() {
	echo "purging cluster"
	for nodeip in "${nodes[@]}"
	do
		echo -n "purging ${nodeip}..."
		ssh -qt fcrepo@$nodeip "sudo rm -Rf /var/lib/tomcat7/fcrepo4-data/* /var/lib/tomcat7/work/Catalina/localhost/*"
		echo "done"
	done
}
push_config() {
	for nodeip in "${nodes[@]}"
	do
		echo "pushing config file $2 to $nodeip"
		scp $1 fcrepo@${nodeip}:fcrepo-config/
	done
}
restart_purge() {
	stop_cluster
	purge
	start_cluster
}
push_war() {
	stop_cluster
	purge
	rm -Rf /tmp/fcrepo
	mkdir /tmp/fcrepo
	unzip -qq $1 -d /tmp/fcrepo
	for nodeip in "${nodes[@]}"
	do
		echo -n "pushing WAR file to ${nodeip}..."
		ssh -qt fcrepo@${nodeip} "sudo rm -Rf /tmp/fcrepo"
		scp -qr /tmp/fcrepo fcrepo@${nodeip}:/tmp
		ssh -qt fcrepo@${nodeip} "sudo rm -Rf /opt/fcrepo/*"
		ssh -qt fcrepo@${nodeip} "sudo mv /tmp/fcrepo /opt/"
		echo "done.";
	done
}
push_jar() {
	stop_cluster
	purge
	for nodeip in "${nodes[@]}"
	do
		echo -n "pushing JAR to ${nodeip}..."
		scp -q $1 fcrepo@${nodeip}:/opt/fcrepo/WEB-INF/lib
		echo "done."
	done
}

case "$1" in
	start)
	start_cluster
	;;
	stop)
	stop_cluster
	;;
	restart)
	restart_cluster
	;;
	stop-node)
	stop_node $2
	;;
	start-node)
	start_node $2
	;;
	restart-node)
	restart_node $2
	;;
	status)
	status
	;;
	purge)
	purge
	;;
	push-config)
	push_config $2
	;;
	restart-purge)
	restart_purge
	;;
	push-war)
	push_war $2
	;;
	push-jar)
	push_jar $2
	;;
	*)
	echo $"Usage: $0 {start|stop|restart|restart-purge|start-node|stop-node|restart-node|status|purge|push-config|push-war|push-jar}"
        exit 1
esac