curie.ccc.cea.fr GPU

Here you find information about the system http://www-hpc.cea.fr/en/complexe/tgcc-curie.htm.

Warning: May 14 2013: rpa-gpu-expt branch fails to compile due to cublasZdgmm missing in cuda-4.2.

The system runs Bull Linux. The installation assumes bash shell:

  • packages are installed under ~/CAMd:

    mkdir ~/CAMd
    cd ~/CAMd
    
  • module files are located under ~/CAMd/modulefiles:

    mkdir ~/CAMd/modulefiles
    
  • download the customize_curie_gpu.py file:

    import os
    
    scalapack = True
    hdf5 = False
    # ld: /usr/local/phdf5-1.8.5/lib/libhdf5.a(H5.o): relocation R_X86_64_32 against `.rodata.str1.4' can not be used when making a shared object; recompile with -fPIC
    compiler = 'icc'
    
    mpi='/opt/mpi/bullxmpi/1.1.16.5'
    mkl='/usr/local/Intel_compilers/c/composer_xe_2011_sp1.7.256/mkl/lib/intel64'
    intel='/usr/local/Intel_compilers/c/composer_xe_2011_sp1.7.256/compiler/lib/intel64'
    hdf='/usr/local/phdf5-1.8.5'
    #
    # cublasZdgmm does not exist in cuda 4.2
    # /tmp/ipo_iccjq2M5h1.o: In function `cudgmm':
    # ipo_out1.c:(.text.hot0001d+0x522b): undefined reference to `cublasZdgmm'
    # strings /usr/local/cuda-4.2/lib64/libcublas.so | grep "cublasZdgmm"
    cuda='/usr/local/cuda-4.2'  # comment out if no cuda
    
    libraries =[
        'cublas', 'cufft', 'cuda',  # comment out if no cuda
        'cudart',  # comment out if no cuda
        #'mkl_def',
        'mkl_scalapack_lp64', 'mkl_intel_lp64', 'mkl_sequential',
        'mkl_core', 'mkl_blacs_openmpi_lp64',
        #'hdf5',
        'mpi',
        ]
    library_dirs =[
        intel,
        os.path.join(mpi, 'lib'),
        mkl,
        os.path.join(cuda, 'lib64'),  # comment out if no cuda
        #os.path.join(hdf, 'lib'),
        ]
    include_dirs +=[
        os.path.join(mpi, 'include'),
        os.path.join(cuda, 'include'),  # comment out if no cuda
        #os.path.join(hdf, 'include'),
        ]
    extra_link_args =[
        '-Wl,-rpath=' + intel +
        ',-rpath=' + os.path.join(mpi, 'lib') +
        ',-rpath=' + os.path.join(cuda, 'lib64') +  # comment out if no cuda
        ',-rpath=' + mkl
        #',-rpath=' + os.path.join(hdf, 'lib')
        ]
    extra_compile_args =['-xHOST', '-O3', '-ipo', '-std=c99', '-fPIC', '-Wall']
    extra_objects += ['./c/cukernels.o']
    define_macros += [('GPAW_NO_UNDERSCORE_CBLACS', '1')]
    define_macros += [('GPAW_NO_UNDERSCORE_CSCALAPACK', '1')]
    mpicompiler = os.path.join(mpi, 'bin', 'mpicc')
    mpilinker = mpicompiler
    
    
  • download packages with download_curie_gpu.sh:

    export APPS="readlink -f ~/CAMd"
    export MODULEFILES="${APPS}/modulefiles"
    
    # warning - firewall blocks that, so download on other machine and scp!
    
    cd ${APPS}
    # download packages
    nose_version=1.1.2
    wget http://pypi.python.org/packages/source/n/nose/nose-${nose_version}.tar.gz
    numpy_version=1.5.1
    wget http://downloads.sourceforge.net/numpy/numpy-${numpy_version}.tar.gz
    scipy_version=0.9.0
    wget https://downloads.sourceforge.net/project/scipy/scipy/${scipy_version}/scipy-${scipy_version}.tar.gz
    ase_version=3.7.0.3168
    wget https://wiki.fysik.dtu.dk/ase-files/python-ase-${ase_version}.tar.gz
    gpaw_version=0.9.0.8965
    wget https://wiki.fysik.dtu.dk/gpaw-files/gpaw-${gpaw_version}.tar.gz
    gpaw_setups_version=0.9.9672
    wget http://wiki.fysik.dtu.dk/gpaw-files/gpaw-setups-${gpaw_setups_version}.tar.gz
    # OK, curie does not allow svn!
    svn co https://svn.fysik.dtu.dk/projects/gpaw/branches/rpa-gpu-expt
    
  • install packages, deploy modules and test with install_curie_gpu.sh:

    export APPS=`readlink -f ~/CAMd`
    export CAMD_MODULEFILES="${APPS}/modulefiles"
    
    export GPAW_PLATFORM=`python -c "from distutils import util, sysconfig; print util.get_platform()+'-'+sysconfig.get_python_version()"`
    export PYTHONVERSION=`python -c "from distutils import sysconfig; print sysconfig.get_python_version()"`
    
    # build packages
    
    nose_version=1.1.2
    tar zxf nose-${nose_version}.tar.gz
    cd nose-${nose_version}
    python setup.py install --root=${APPS}/nose-${nose_version}-1
    cd ..
    
    mkdir -p ${CAMD_MODULEFILES}/nose
    cat <<EOF > ${CAMD_MODULEFILES}/nose/${nose_version}-1
    #%Module1.0
    set apps_path ${APPS}
    prepend-path    PATH                \$apps_path/nose-${nose_version}-1/usr/bin
    prepend-path    PYTHONPATH          \$apps_path/nose-${nose_version}-1/usr/lib/python${PYTHONVERSION}/site-packages/
    unset apps_path
    EOF
    
    numpy_version=1.5.1
    tar zxf numpy-${numpy_version}.tar.gz
    cd  numpy-${numpy_version}
    # atlas on curie is built without -fPIC
    # /usr/bin/ld: /usr/local/atlas-3.9.72/lib/libcblas.a(cblas_dgemm.o): relocation R_X86_64_32 against `.rodata.str1.8' can not be used when making a shared object; recompile with -fPIC
    # and atlas-devel is not installed!
    # so hack!
    ln -s /usr/lib64/atlas/libatlas.so.3.0 libatlas.so
    ln -s /usr/lib64/atlas/libcblas.so.3.0 libcblas.so
    ln -s /usr/lib64/atlas/libclapack.so.3 libclapack.so
    ln -s /usr/lib64/atlas/libf77blas.so.3 libf77blas.so
    ln -s /usr/lib64/atlas/liblapack.so.3 liblapack.so
    echo "[DEFAULT]" > site.cfg
    echo "library_dirs = $PWD" >> site.cfg
    echo "include_dirs = /usr/local/atlas-3.9.72/include" >> site.cfg
    # avoid "Both g77 and gfortran runtimes linked in lapack_lite !" setting --fcompiler=gnu95
    # note that this forces /usr/bin/gfortran to be used
    python setup.py build --fcompiler=gnu95 2>&1 | tee build.log
    python setup.py install --root=${APPS}/numpy-${numpy_version}-1 2>&1 | tee install.log
    cd ..
    
    mkdir -p ${CAMD_MODULEFILES}/numpy
    cat <<EOF > ${CAMD_MODULEFILES}/numpy/${numpy_version}-1
    #%Module1.0
    set apps_path ${APPS}
    prereq nose
    prepend-path    PATH                \$apps_path/numpy-${numpy_version}-1/usr/bin
    prepend-path    PYTHONPATH          \$apps_path/numpy-${numpy_version}-1/usr/lib64/python${PYTHONVERSION}/site-packages/
    unset apps_path
    EOF
    
    # the atlas is missing on the hybrid (only?) nodes so hack again!
    mkdir atlas && cd atlas
    cp -p /usr/lib64/atlas/liblapack.so.3 .
    cp -p /usr/lib64/atlas/libf77blas.so.3 .
    cp -p /usr/lib64/atlas/libcblas.so.3 .
    cp -p /usr/lib64/atlas/libatlas.so.3 .
    cd ..
    
    module use $CAMD_MODULEFILES
    module load nose
    module load numpy  # scipy build needs numpy!
    
    scipy_version=0.9.0
    tar zxf scipy-${scipy_version}.tar.gz
    cd  scipy-${scipy_version}
    # avoid g77 - leads to Segmentation faults
    # note that this forces /usr/bin/gfortran to be used
    python setup.py build --fcompiler=gnu95 2>&1 | tee build.log
    python setup.py install --root=${APPS}/scipy-${scipy_version}-1 2>&1 | tee install.log
    cd ..
    
    mkdir -p ${CAMD_MODULEFILES}/scipy
    cat <<EOF > ${CAMD_MODULEFILES}/scipy/${scipy_version}-1
    #%Module1.0
    set apps_path ${APPS}
    prereq nose
    prepend-path    PATH                \$apps_path/scipy-${scipy_version}-1/usr/bin
    prepend-path    PYTHONPATH          \$apps_path/scipy-${scipy_version}-1/usr/lib64/python${PYTHONVERSION}/site-packages/
    unset apps_path
    EOF
    
    ase_version=3.7.0.3168
    tar zxf python-ase-${ase_version}.tar.gz
    
    mkdir -p ${CAMD_MODULEFILES}/python-ase
    cat <<EOF > ${CAMD_MODULEFILES}/python-ase/${ase_version}-1
    #%Module1.0
    set apps_path ${APPS}
    prereq numpy
    prepend-path    PATH                \$apps_path/python-ase-${ase_version}/tools
    prepend-path    PYTHONPATH          \$apps_path/python-ase-${ase_version}/
    unset apps_path
    EOF
    
    gpaw_setups_version=0.9.9672
    tar zxf gpaw-setups-${gpaw_setups_version}.tar.gz
    
    mkdir -p ${CAMD_MODULEFILES}/gpaw-setups
    cat <<EOF > ${CAMD_MODULEFILES}/gpaw-setups/${gpaw_setups_version}-1
    #%Module1.0
    set apps_path ${APPS}
    prepend-path    GPAW_SETUP_PATH     \$apps_path/gpaw-setups-${gpaw_setups_version}
    unset apps_path
    EOF
    
    gpaw_version=0.9.0.8965
    tar zxf gpaw-${gpaw_version}.tar.gz
    
    mkdir -p ${CAMD_MODULEFILES}/gpaw
    cat <<EOF > ${CAMD_MODULEFILES}/gpaw/${gpaw_version}-1
    #%Module1.0
    set apps_path ${APPS}
    prereq python-ase
    prereq gpaw-setups
    prepend-path    PATH                \$apps_path/gpaw-${gpaw_version}/tools
    prepend-path    PATH                \$apps_path/gpaw-${gpaw_version}/build/bin.${GPAW_PLATFORM}
    prepend-path    PYTHONPATH          \$apps_path/gpaw-${gpaw_version}/
    prepend-path    PYTHONPATH          \$apps_path/gpaw-${gpaw_version}/build/lib.${GPAW_PLATFORM}
    setenv OMP_NUM_THREADS 1
    unset apps_path
    EOF
    
    module load nose
    module load numpy
    module load scipy
    # test numpy and scipy
    python -c "import numpy; numpy.test()"
    python -c "import scipy; scipy.test()"
    
    # test ase
    module load python-ase
    mkdir -p testase
    cd testase
    testase.py --no-display 2>&1 | tee testase.log
    cd ..
    
    # build gpaw
    cd gpaw-${gpaw_version}
    module load cuda
    # if on rpa-gpu-expt branch
    rm -f c/cukernels.o
    cd c
    nvcc -arch sm_20 -c cukernels.cu -Xcompiler -fPIC
    cd ..
    # wget https://svn.fysik.dtu.dk/projects/gpaw/trunk/config.py  # fixed in trunk
    python setup.py build_ext --customize=../customize_curie_gpu.py --remove-default-flags 2>&1 | tee build_ext.log
    cd ..
    module load gpaw-setups
    module load gpaw
    

    Note that every time you wish to install a new version of a package, and deploy new module file, better keep the old module file.

  • submit a test job:

    ccc_msub msub_curie_gpu.sh
    

    using the following msub_curie_gpu.sh:

    #/bin/sh
    #MSUB -n 8 # number of tasks
    #MSUB -T 4600 # time
    #MSUB -q hybrid # use hybrid for GPU
    #MSUB -A paxxxx
    
    set -x
    cd ${BRIDGE_MSUB_PWD}
    module use ${HOME}/CAMd/modulefiles
    module load nose
    # blas/lapack/atlas missing
    export LD_LIBRARY_PATH=${HOME}/CAMd/atlas:${LD_LIBRARY_PATH}
    module load numpy
    module load scipy
    module load python-ase
    module load gpaw-setups/0.8.7929-1
    #module load gpaw-setups/0.9.9672-1
    module load gpaw
    #ccc_mprun gpaw-python gpaw-0.9.0.8965/gpaw/test/2Al.py
    ccc_mprun gpaw-python `which gpaw-test`