#!/usr/pythoncontroller/python
import sys
import glob
import re
import os
import time
from subprocess import *
from threading import Thread, current_thread
from Queue import Queue
from signal import *

KVM_KERNEL_BASE = "2.6.32"
MIN_KVM_VERSION = 279

XEN3_KERNEL_BASE = "2.6.18"
MIN_XEN3_VERSION = 308

XEN4_KERNEL_BASE = "3.18.25"
MIN_XEN4_VERSION = 18

HOSTS = []

def usage():
    print "Usage:"
    print "\tliveUpdate listHVs [-nc]"
    print "\tliveUpdate updateToolstack <HV IP Addr> force"
    print "\tliveUpdate restartControllers <HV IP Addr> force <timeout_in_secs> <Controller ID>"
    print "\tliveUpdate refreshControllers <HV IP Addr>"
    print "\tliveUpdate updateDrivers <HV IP Addr>"
    print "\tliveUpdate liveRestartBackends <HV IP Addr>"
    sys.exit(0)

def getFormat(message):
    return time.strftime('%d-%m-%Y-%H:%M:%S ', time.gmtime()) + ': %s : ' % current_thread().name + message

def testPID(pid):
    try:
        os.kill(pid,0)
    except:
        return False
    return True

def filetransfer(src,dst,ip):
    cmdList = ["scp","-r",src,"%s:%s" % (ip,dst)]
    process = Popen(cmdList, shell=False,stdout=PIPE,stderr=PIPE)
    out,err = process.communicate()

    if process.returncode != 0:
        print "Filetransfer failed! (%s,%s)" % (out,err)
        sys.exit(1)
    return

def remoteExec(cmd, ip, notCheckHost = False):
    if (notCheckHost == True):
        cmdList = ["ssh", "-o StrictHostKeyChecking no", ip] + cmd
    else:
        cmdList = ["ssh", ip] + cmd
    retries = 20
    retry = 0
    ret = -11
    while ret==-11 and retry < retries:
        process = Popen(cmdList, shell=False,stdout=PIPE,stderr=PIPE)
        out,err = process.communicate()
        ret = process.returncode
        if ret != -11:
            break
        else:
            print "remoteExec - retrying command %s, attempt: %d" % (cmd, retry)
        retry += 1    
    return out,err,process.returncode

def localExec(cmd):
    process = Popen(cmd, shell=False,stdout=PIPE,stderr=PIPE)
    out,err = process.communicate()
    return out,err,process.returncode

def getHostid(ip):
    cmd = ['cat','/.rw/onappstore.conf']
    out,err,ret = remoteExec(cmd,ip)
    lines = out.split('\n')
    for line in lines:
        if re.match('^hostid',line):
            return line.split('=')[1]

def _isAlive(ip, notCheckHost = False):
    cmd = ['true']
    out,err,ret = remoteExec(cmd, ip, notCheckHost)
    if ret != 0:
        return False
    return True

def _isCentos6(ip):
    cmd = ['cat','/etc/redhat-release']
    out,err,ret = remoteExec(cmd,ip)
    v = out.split()[2]
    if re.match('^6',v):
        return True
    return False

def _isCentos7(ip):
    cmd = ['cat','/etc/redhat-release']
    out,err,ret = remoteExec(cmd,ip)
    v = out.split()[3]
    if re.match('^7',v):
        return True
    return False

def _needsToolstackRestart(ip):
    cmd = ['diff','-q','/onappstore/package-version.txt','/onappstore/toolstackversion.txt']
    out,err,ret = remoteExec(cmd,ip)
    if ret != 0:
        return True
    return False

def _needsControllerRestart(ip):
    cmd = ['diff','-q','/onappstore/package-version.txt','/onappstore/controllerversion.txt']
    out,err,ret = remoteExec(cmd,ip)
    if ret != 0:
        return True
    return False

def _isXenHV(ip):
    cmd = ['xm','info']
    out,err,ret = remoteExec(cmd,ip)
    if ret != 0:
        return False
    return True

# returns 3 or 4 
def findXenVersion(ip):
    cmd = ['xm', 'info']
    out, err, ret = remoteExec(cmd, ip)
    for line in out.strip().split('\n'):
        if line.startswith('xen_major'):
            line = re.sub(r'\s+', '', line)
            return int(line.split(':')[1])

def _isKVMHV(ip):
    cmd = ['lsmod']
    out,err,ret = remoteExec(cmd,ip)

    for line in out.split('\n'):
        if re.match('^kvm',line):
            return True
    return False

def getHVIPs():
    if len(HOSTS):
        return HOSTS

    # first try with the new dhcp conf file path
    path = "/onapp/configuration/dhcp/dhcpd.conf"
    if not os.path.exists(path):
        # if not present, set to the pre-4.0 path
        path = "/home/onapp/dhcpd.conf"
    fd = open(path,'r')
    iplist = []
    for l in fd.readlines():
        if re.match("^fixed-address",l.strip()):
            iplist.append(l.strip()[:-1].split()[1])
    fd.close()
    return iplist

def parse_output(out,err,code):
    if code == 0:
        return "SUCCESS"
    retval = "FAIL"
    if len(out):
        retval += " [%s]" % out
    if len(err):
        retval += " [%s]" % err
    return retval

def runPreScript(hvip):
    print getFormat("Running the pre-script on HV: %s" % hvip)
    cmd = ['/tmp/pre-%s.sh' % txn_id]
    out,err,ret = remoteExec(cmd, hvip)
    if ret:
        print getFormat("Failed to run pre-script on HV: %s. Output: %s Error: %s" % (hvip, out, err))
        sys.exit(1)

def runPostScript(hvip):
    print getFormat("Running the post-script on HV: %s" % hvip)
    cmd = ['/tmp/post-%s.sh' % txn_id]
    out,err,ret = remoteExec(cmd, hvip)
    if ret:
        print getFormat("Failed to run post-script on HV: %s. Output: %s Error: %s" % (hvip, out, err))
        sys.exit(1)

class runFnInthread(Thread):
    def __init__(self, queue, fn, *args):
        self.queue = queue
        self.fn = fn
        self.args = args
        Thread.__init__(self)
    def run(self):
        self.queue.put(self.fn(*self.args))

def waitForThreadsToFinish(threads, queues):
    results = []
    allover = False
    while not allover:
        allover = True
        for t,q in zip(threads, queues):
            if t.isAlive():
                allover = False
                break
            else:
                result = q.get()
                results.append(result)
                threads.remove(t)
                queues.remove(q)
                if len(threads):
                    allover = False
                break
        time.sleep(0.1)
    return results

class monitorPidInThreadAndKillOnTimeout(Thread):
    def __init__(self, pid, timeout):
        self.pid = pid
        self.timeout = int(timeout)
        Thread.__init__(self)
    def run(self):
        timeelapsed = 0
        while timeelapsed < self.timeout:
            if testPID(self.pid):
                if not timeelapsed % 10 and timeelapsed > 0:
                    print getFormat("Pid %d still active, time elapsed %s, will timeout at %d seconds." % (int(self.pid), int(timeelapsed), self.timeout)) 
                time.sleep(1)
                timeelapsed += 1
            else:
                return

        if timeelapsed == self.timeout:
            print getFormat("Pid %d still active, time elapsed %s, timing it out." % (int(self.pid), int(timeelapsed)))
            os.kill(self.pid,SIGTERM)

if len(sys.argv)<2:
    usage()

if sys.argv[1] == "listHVs":
    IPs = getHVIPs()
    notCheckHost = False
    if (len(sys.argv) == 3) and (sys.argv[2] == "-nc"):
        notCheckHost = True
    for ip in IPs:
        version = 5
        if not _isAlive(ip, notCheckHost):
            continue
        if _isCentos6(ip):
            version = 6
        if _isCentos7(ip):
            version = 7
        platform = "XEN"
        if _isKVMHV(ip):
            platform = "KVM"
            src = "/tftpboot/images/centos6/ramdisk-kvm/liveupdate.tgz"
       
        if platform == "XEN":
            xen_version = findXenVersion(ip)
            if xen_version == 3:
                src = "/tftpboot/images/centos5/ramdisk-xen/liveupdate.tgz"
            elif xen_version == 4:
                src = "/tftpboot/images/centos6/ramdisk-xen/liveupdate.tgz"

	dst = "/."
	if not os.path.exists(src):
	    print "Failed to find liveupdate archive. Please update cloudboot RPM on CP server."
	    sys.exit(1)
	filetransfer(src,dst,ip)

	# untar just the package version file
	cmd = ['cd', '/',';','tar' ,'-xzf', '/liveupdate.tgz', './onappstore/package-version.txt']
	out,err,ret = remoteExec(cmd,ip)

        toolstack_upgradable = _needsToolstackRestart(ip)
        controller_upgradable = _needsControllerRestart(ip)
        if platform == "XEN":
            print "Node: %s\tCentOS Version: %d\tHV type: %s\tToolstack-upgradable: %s\tController-upgradable: %s\tXen Version: %d" % (ip, version, platform, toolstack_upgradable, controller_upgradable, xen_version)
        else: 
            print "Node: %s\tCentOS Version: %d\tHV type: %s\tToolstack-upgradable: %s\tController-upgradable: %s" % (ip, version, platform, toolstack_upgradable, controller_upgradable)
    sys.exit(0)

if len(sys.argv)<3:
    usage()

if sys.argv[1] == "updateToolstack":
    ip = sys.argv[2]
    if not _isAlive(ip):
        print "Unable to contact host %s" % ip
        sys.exit(1)
    version = 5
    if _isCentos6(ip):
        version = 6
    if _isCentos7(ip):
        version = 7
    if _isXenHV(ip):
        platform = "XEN"
        xen_version = findXenVersion(ip)
        if xen_version == 3:
            src = "/tftpboot/images/centos5/ramdisk-xen/liveupdate.tgz"
        elif xen_version == 4:
            src = "/tftpboot/images/centos6/ramdisk-xen/liveupdate.tgz"
    else:
        platform = "KVM"
        if version == 6:
            src = "/tftpboot/images/centos6/ramdisk-kvm/liveupdate.tgz"
        elif version == 7:
            src = "/tftpboot/images/centos7/ramdisk-kvm/liveupdate.tgz"
    dst = "/."

    if not os.path.exists(src):
        print "Failed to find liveupdate archive. Please update cloudboot RPM on CP server."
        sys.exit(1)
    filetransfer(src,dst,ip)
    print "STEP1 - Copied liveupdate archive onto HV"

    # untar just the package version file
    cmd = ['cd', '/',';','tar' ,'-xzf', '/liveupdate.tgz', './onappstore/package-version.txt']
    out,err,ret = remoteExec(cmd,ip)
    print "STEP2 - Untarring toolstack version file from liveupdate tarball: %s" % (parse_output(out,err,ret))
    
    #Now check whether version has changed
    forcerestart = False
    if len(sys.argv) > 3:
        if sys.argv[3] == 'force':
            forcerestart = True
    
    restart = _needsToolstackRestart(ip)
    print "STEP3 - Check whether toolstack version has changed: %s" % str(restart)
    if not restart and not forcerestart:
        print "Exiting, no more work to do."
        sys.exit(0)

    #Stop crond before unpacking tarball
    cmd = ['/etc/init.d/crond', 'stop']
    out,err,ret = remoteExec(cmd,ip)
 
    cmd = ['tar','-zx', '--touch', '--no-overwrite-dir','-C','/', '-f', '/liveupdate.tgz']
    out,err,ret = remoteExec(cmd,ip)
    print "STEP4 - Untarring liveupdate archive: %s" % (parse_output(out,err,ret))

    #Remove the tar archive
    cmd = ['rm','/liveupdate.tgz']
    out,err,ret = remoteExec(cmd,ip)
    print "STEP5 - Removing liveupdate archive: %s" % (parse_output(out,err,ret))

    if version == 7:
        cmd = ['/bin/cp', '-a', '/usr/pythoncontroller/storageAPI.service', '/usr/lib/systemd/system/storageAPI.service']
        out,err,ret = remoteExec(cmd,ip)
        print "STEP5.1 - Updating storageAPI.service unit file: %s" % (parse_output(out,err,ret))
        cmd = ['systemctl', 'daemon-reload']
        out,err,ret = remoteExec(cmd,ip)
        print "STEP5.2 - Reloading systemd manager configuration: %s" % (parse_output(out,err,ret))

    #Now restart groupmon and storageAPI deamons
    print "STEP6 - Stopping groupmon: %s" % (parse_output(out,err,ret))
    cmd = ['service', 'groupmon', 'stop']
    out,err,ret = remoteExec(cmd,ip)
    time.sleep(2)
    cmd = ['killall','-9', 'groupmon']
    out,err,ret = remoteExec(cmd,ip)
    time.sleep(2)
    cmd = ['killall','-9', 'groupmon']
    out,err,ret = remoteExec(cmd,ip)
    time.sleep(2)
    
    cmd = ['service','storageAPI','stop']
    out,err,ret = remoteExec(cmd,ip)
    print "STEP7 - Stopping storageAPI: %s" % (parse_output(out,err,ret))

    cmd = ['killall', '-9', 'redis-server']
    out,err,ret = remoteExec(cmd,ip)
    print "STEP8 - Stopping redis-server: %s" % (parse_output(out,err,ret))

    cmd = ['rm','-f','/DB/objects.db']
    out,err,ret = remoteExec(cmd,ip)
    print "STEP9 - Resetting redis DB: %s" % (parse_output(out,err,ret))

    cmd = ['rm','-f','/tmp/*lock']
    out,err,ret = remoteExec(cmd,ip)
    cmd = ['rm','-f','/tmp/DS-*']
    out,err,ret = remoteExec(cmd,ip)
    cmd = ['rm','-f','/tmp/NODE-*']
    out,err,ret = remoteExec(cmd,ip)
    cmd = ['rm','-f','/tmp/VDISK-*']
    out,err,ret = remoteExec(cmd,ip)
    print "STEP10 - Cleaning stale locks: %s" % (parse_output(out,err,ret))

    print "STEP - Restarting SNMP services"
    cmd = ['/scripts/fix_ip_of_cp_hv.sh']
    out,err,ret = remoteExec(cmd,ip)
    cmd = ['service','snmptrapd','stop']
    out,err,ret = remoteExec(cmd,ip)
    cmd = ['service','snmpd','stop']
    out,err,ret = remoteExec(cmd,ip)
    cmd = ['service','snmpd','start']
    out,err,ret = remoteExec(cmd,ip)
    cmd = ['service','snmptrapd','start']
    out,err,ret = remoteExec(cmd,ip)
    
    cmd = ['service', 'groupmon', 'start']
    out,err,ret = remoteExec(cmd,ip)
    print "STEP11 - Restarting groupmon: %s" % (parse_output(out,err,ret))

    cmd = ['service', 'crond', 'start']
    out,err,ret = remoteExec(cmd,ip)

    cmd = ['rm','-f','/onappstore/toolstackversion.txt']
    out,err,ret = remoteExec(cmd,ip)
    cmd = ['cp','/onappstore/package-version.txt','/onappstore/toolstackversion.txt']
    out,err,ret = remoteExec(cmd,ip)
    print "STEP12 - Recording runtime toolstack version: %s" % (parse_output(out,err,ret))

    cmd = ['/usr/pythoncontroller/convertVMConfigtoHotplug']
    out,err,ret = remoteExec(cmd,ip)
    print "STEP13 - Convert storage controllers to enable hotplug"

    time.sleep(2)

    print "STEP14 - wait for 3 minutes while Nodes DB gets re-populated"
    minute = 1
    for i in range(1,91):
        sys.stdout.write(".")
        sys.stdout.flush()
        time.sleep(2)
        if i>0 and i%30 == 0:
            print " Minute %d complete" % minute
            minute+=1

    # now that everything is up, start the storage API
    cmd = ['service','storageAPI','start']
    out,err,ret = remoteExec(cmd,ip)
    print "Final step - Starting storageAPI: %s" % (parse_output(out,err,ret))

    sys.exit(0)

elif sys.argv[1] == "restartControllers":
    ip = sys.argv[2]
    if not _isAlive(ip):
        print getFormat("Unable to contact host %s" % ip)
        sys.exit(1)
    if _isXenHV(ip):
        platform = "XEN"
    else:
        platform = "KVM"

    restart = False
    if len(sys.argv) > 3:
        if sys.argv[3] == 'force':
            restart = True
    else:
        #Now check whether version has changed
        restart = _needsControllerRestart(ip)
        print getFormat("Check whether controller version has changed: %s" % str(restart))

    if not restart:
            print "Exiting, no more work to do."
            sys.exit(0)
  
    timeout = 120
    if len(sys.argv) > 4:
        timeout = int(sys.argv[4])

    nodeid = None
    if len(sys.argv) > 5:
        nodeid = int(sys.argv[5])

    print getFormat("Starting storage controllers restart process - PLEASE DO NOT INTERRUPT!!")
    print getFormat("1. Preparing HVs for the operation")
    txn_id = time.strftime('%d-%m-%Y_%H-%M-%S', time.gmtime())
    host_id = getHostid(ip) 
    IPs = getHVIPs()
    for hvip in IPs:
        print getFormat("Preparing HV: %s" % hvip)
        cmd = ['scp', '/usr/local/bin/prepareUpgradeScripts.sh', 'root@%s:/tmp/.' % hvip] 
        out,err,ret = localExec(cmd)
        if ret:
            print getFormat("Failed to prepare HV: %s. Output: %s Error: %s" % (hvip, out, err))
            sys.exit(1)
        cmd = ['chmod 755 /tmp/prepareUpgradeScripts.sh']
        out,err,ret = remoteExec(cmd, hvip)
        if ret:
            print getFormat("Failed to set permissions for prepare script on HV: %s. Output: %s Error: %s" % (hvip, out, err))
            sys.exit(1)
        if nodeid <> None:
            cmd = ['/tmp/prepareUpgradeScripts.sh', txn_id, str(host_id), str(nodeid + 1)]
        else:
            cmd = ['/tmp/prepareUpgradeScripts.sh', txn_id, str(host_id)]
        out,err,ret = remoteExec(cmd, hvip)
        if ret:
            print getFormat("Failed to prepare HV: %s. Output: %s Error: %s" % (hvip, out, err))
            sys.exit(1)

    try:
	print getFormat("2. Running the pre-script on the HVs") 
        threadlist = []
        queuelist = []
	for hvip in IPs:
            # run the pre-script on each hv in parallel
            q = Queue()
            t = runFnInthread(q, runPreScript, hvip)
            t.start()
            threadlist.append(t)
            queuelist.append(q)
        
        # wait for the threads to finish
        waitForThreadsToFinish(threadlist, queuelist)
 
        # run the controller restart script on the hv in question
	print getFormat("3. Restarting the Storagecontrollers")
        
        if nodeid <> None:
            cmd = ['ssh', ip, '/usr/pythoncontroller/diskhotplug', 'restartController' , str(nodeid)]
        else:
            cmd = ['ssh', ip, '/usr/pythoncontroller/controllerRestart']
        q = Queue()
        restartctrlprocess = Popen(cmd, shell=False,stdout=PIPE, stderr=PIPE, close_fds = False)
        pid = restartctrlprocess.pid
        t = monitorPidInThreadAndKillOnTimeout(pid, timeout)
        t.start()
        out,err = restartctrlprocess.communicate()
        ret = restartctrlprocess.returncode
        if ret == 0:
	    print getFormat("Restarted Storagecontrollers: %s" % (parse_output(out,err,ret)))
        else:  
	    print getFormat("Restarting Storagecontrollers failed with: %s" % (parse_output(out,err,ret)))
    finally:
	print getFormat("4. Running the post-script on the HVs") 
	# run the post-script on each hv in parallel
        threadlist = []
        queuelist = []
	for hvip in IPs:
            # run the pre-script on each hv in turn
            q = Queue()
            t = runFnInthread(q, runPostScript, hvip)
            t.start()
            threadlist.append(t)
            queuelist.append(q)

        # wait for the threads to finish
        waitForThreadsToFinish(threadlist, queuelist)

        print getFormat("5. Add SANController status attribute")
        cmd = ['touch','/var/lock/subsys/SANController']
        out,err,ret = remoteExec(cmd,ip)

    print getFormat("Storage controllers restart complete!!")

elif sys.argv[1] == "refreshControllers":
    ip = sys.argv[2]
    if not _isAlive(ip):
        print "Unable to contact host %s" % ip
        sys.exit(1)
    if _isXenHV(ip):
        platform = "XEN"
        xen_version = findXenVersion(ip)
        if xen_version == 3:
            src = "/tftpboot/images/centos5/ramdisk-xen/liveupdate-storagenode.tgz"
        elif xen_version == 4:
            src = "/tftpboot/images/centos6/ramdisk-xen/liveupdate-storagenode.tgz"
    else:
        platform = "KVM"
        src = "/tftpboot/images/centos6/ramdisk-kvm/liveupdate-storagenode.tgz"
    dst = "/."

    print "STEP1 - Copying liveupdate storagenode archive onto HV"
    if not os.path.exists(src):
        print "Failed to find liveupdate storagenode archive. Please update cloudboot RPM on CP server."
        sys.exit(1)
    filetransfer(src,dst,ip)
    print "      - Copied liveupdate storagenode archive onto HV"

    # now refresh the controllers using the installed script
    print "STEP2 - Refreshing local controllers on the HV: %s" % ip 
    cmd = ['/usr/pythoncontroller/refreshcontrollers', '/liveupdate-storagenode.tgz']
    out,err,ret = remoteExec(cmd,ip)
    print "      - Refreshed local controllers on the HV: %s" % (parse_output(out,err,ret))
    
    #Remove the tar archive
    print "STEP3 - Removing liveupdate storagenode archive: %s" % ip
    cmd = ['rm','/liveupdate-storagenode.tgz']
    out,err,ret = remoteExec(cmd,ip)
    print "      - Removed liveupdate storagenode archive: %s" % (parse_output(out,err,ret))
    sys.exit(0)

elif sys.argv[1] == "liveRestartBackends":
    ip = sys.argv[2]
    if not _isAlive(ip):
        print "Unable to contact host %s" % ip
        sys.exit(1)

    # now live restart the backends for this HV
    # first list active vdisks on this HV
    cmd = ['ps', 'ax', '|', 'grep', 'bdevclient' ,'|', 'grep', '-v', '\"grep bdevclient\"', '|', 'awk', '\'{print $12}\'', '|', 'sort', '|', 'uniq'] 
    out,err,ret = remoteExec(cmd,ip)  
   
    vdisks = out.strip().split('\n') 
    print "Live restarting the backends on the HV: %s, active VDisks: %s" % (ip, ','.join(vdisks))
    for vdisk in vdisks: 
        cmd = ['/usr/bin/restartlivevdiskbackends', vdisk]
        print "      - Live restarting the backends for the VDisk: %s" % vdisk
        out,err,ret = remoteExec(cmd,ip)
    print "Live restarted the backends on the HV: %s" % ip
    sys.exit(0)

elif sys.argv[1] == "updateDrivers":
    ip = sys.argv[2]
    if not _isAlive(ip):
        print "Unable to contact host %s" % ip
        sys.exit(1)
    if _isXenHV(ip):
        platform = "XEN"
    else:
        platform = "KVM"

    cmd = ['ls','-1','/dev/mapper/*']
    out,err,ret = remoteExec(cmd,ip)
    if len(out[:-1].split('\n'))>1:
        out = "Please migrate all active VMs off this HV before re-running the updateDrivers utility."
        ret = 1
    else:
        out = "SUCCESS"
    print "STEP1 - Checking there are no active devices on HV: %s" % parse_output(out,err,ret)
    if ret:
        sys.exit(1)

    cmd = ['modprobe','-r','nbd']
    out,err,ret = remoteExec(cmd,ip)
    print "STEP2 - Unloading nbd module: %s" % (parse_output(out,err,ret))

    cmd = ['modprobe','nbd','nbds_max=1024']
    out,err,ret = remoteExec(cmd,ip)
    print "STEP3 - Reloading nbd module: %s" % (parse_output(out,err,ret))

    cmd = ['modprobe','-r','dm_mirror_sync']
    out,err,ret = remoteExec(cmd,ip)
    print "STEP4 - Unloading dm_mirror_sync module: %s" % (parse_output(out,err,ret))

    cmd = ['modprobe','dm_mirror_sync']
    out,err,ret = remoteExec(cmd,ip)
    print "STEP5 - Reloading dm_mirror_sync module: %s" % (parse_output(out,err,ret))
    sys.exit(0)

else:
    usage()

