VC-replication-3

From PRAGMA wiki
Jump to: navigation, search

An example

In this example,

  • the VC image was made on a VM hosting server at SDSC, fiji.rocksclusters.org, running Rocks 5.3 with XEN roll
  • the original cluster is rocks-184.sdsc.edu
    • 8 compute nodes
    • Software: Intel Fortran Compiler, Globus, SGE, MyProxy
    • Setup: see Base-line
    • NBCR Jane Ren installed autodock and some user written scripts in nbcruser home directory
    • Tested Avian Flu Grid application by Jane Ren
    • The image file was gzip'ed for download

Create a new cluster with AFG VC disk image

# rocks add cluster rocks-201.sdsc.edu 198.202.88.201 2
created frontend VM named: frontend-0-0-10
created compute VM named: hosted-vm-0-0-9
created compute VM named: hosted-vm-0-1-7
  • Get the MAC addresses and find the image file path for the new cluster.
# rocks list host vm rocks-201.sdsc.edu showdisks=y
SLICE MEM   CPUS MAC               HOST DISK                                                       DISKSIZE
10    1024  1    b6:58:ca:00:00:4e fiji file:/state/partition1/xen/disks/frontend-0-0-10.hda,hda,w 36      
----- ----- ---- b6:58:ca:00:00:4f ---- ---------------------------------------------------------- --------
[root@fiji ~]# rocks list host vm rocks-184.sdsc.edu showdisks=y
SLICE MEM   CPUS MAC               HOST DISK                                                      DISKSIZE
4     1024  1    b6:58:ca:00:00:1f fiji file:/state/partition1/xen/disks/frontend-0-0-4.hda,hda,w 36      
----- ----- ---- b6:58:ca:00:00:20 ---- --------------------------------------------------------- --------
  • Download the disk image file
  • unzip then copy disk image file to new cluster file location.
# gunzip afg-rocks-184.hda.gz
# cp afg-rocks-184.hda /state/partition1/xen/disks/frontend-0-0-10.hda

Modify the new disk image before boot

  • Mount the image file on the VM hosting server.
# lomount -diskimage /state/partition1/xen/disks/frontend-0-0-8.hda -partition 1 /media
  • Modify network interface config.
# cd /media/etc/sysconfig/network-scripts
# sudo vi ifcfg-eth0
DEVICE=eth0
HWADDR=b6:58:ca:00:00:4e        # get from "rocks list host vm hostname"
IPADDR=10.1.1.1
NETMASK=255.255.0.0
BOOTPROTO=static
ONBOOT=yes
MTU=1500
# sudo vi ifcfg-eth1
DEVICE=eth1
HWADDR=b6:58:ca:00:00:4f        # get from "rocks list host vm hostname"
IPADDR=198.202.88.201
NETMASK=255.255.224.0
BOOTPROTO=static
ONBOOT=yes
MTU=1500
  • Add ssh public key to root authrized_keys.
# cd /media/root/.ssh
# vi authorized_keys
(add ssh public key)
  • Modify sshd config. (as appropriate)
# cd /media/etc/ssh/
# vi sshd_config
(PasswordAuthentication yes -> no)
  • Unmount.
# umount /media/
  • Start the new Afg VM
# rocks set host boot rocks-201.sdsc.edu action=os
# rocks list host boot rocks-201.sdsc.edu
ACTION
os    
# rocks start host vm rocks-201.sdsc.edu

Change hostname and network configurations

  • Login to Afg VM
# ssh root@rocks-201.sdsc.edu
  • Change hostname in mysql database.
# export PRIVATE_HOSTNAME=rocks-201
# cat << EOF > /tmp/hostname.sql
update nodes set name="$PRIVATE_HOSTNAME" where id=1;
EOF
# /opt/rocks/bin/mysql --user=apache cluster < /tmp/hostname.sql
# hostname rocks-201
  • Update attributes
[root@rocks-201 ~]# rocks list attr
ATTR                                   VALUE                                   
Kickstart_PublicHostname:              rocks-184.sdsc.edu                      
Info_ClusterName:                      Rocks-Cluster                           
Info_CertificateOrganization:          SDSC                                    
Info_CertificateLocality:              San Diego                               
Info_CertificateState:                 California                              
Info_CertificateCountry:               US                                      
Info_ClusterContact:                   admin@sdsc.edu                         
Info_ClusterURL:                       http://www.place.org/                   
Info_ClusterLatlong:                   N32.87 W117.22                          
Kickstart_PrivateHostname:             rocks-184                               
Kickstart_PrivateKickstartCGI:         sbin/kickstart.cgi                      
Kickstart_DistroDir:                   /export/rocks                           
Kickstart_PrivateKickstartBasedir:     install                                 
Kickstart_PublicKickstartHost:         central.rocksclusters.org               
Kickstart_PrivateDNSDomain:            local                                   
Kickstart_PublicDNSDomain:             sdsc.edu                                
Kickstart_Lang:                        en_US                                   
Kickstart_Langsupport:                 en_US                                   
Kickstart_Keyboard:                    us                                      
Kickstart_PrivateAddress:              10.1.1.1                                
Kickstart_PrivateNetmask:              255.255.0.0                             
Kickstart_PrivateNetwork:              10.1.0.0                                
Kickstart_PrivateBroadcast:            10.1.255.255                            
Kickstart_PrivateNetmaskCIDR:          16                                      
Kickstart_PrivateKickstartHost:        10.1.1.1                                
Kickstart_PrivateNTPHost:              10.1.1.1                                
Kickstart_PrivateGateway:              10.1.1.1                                
Kickstart_PrivateDNSServers:           10.1.1.1                                
Kickstart_PrivateSyslogHost:           10.1.1.1                                
Kickstart_Multicast:                   225.82.204.133                          
Kickstart_PublicAddress:               198.202.88.184                          
Kickstart_PublicNetmask:               255.255.255.0                           
Kickstart_PublicNetwork:               198.202.88.0                            
Kickstart_PublicBroadcast:             198.202.88.255                          
Kickstart_PublicNetmaskCIDR:           24                                      
Kickstart_PublicGateway:               198.202.88.20                           
Kickstart_PublicDNSServers:            198.202.75.26                           
Kickstart_PrivateRootPassword:         $1$rahNm9Uc$m1ifK.SNBt0sVghm1aNhc0      
Kickstart_PrivateSHARootPassword:      1f6ca82757a9a1f52a54a44dca2da0f5659385ff
Kickstart_PrivatePortableRootPassword: $P$BQVHJzIJ7vGSD7nBT4BItJu2m9cxTV0      
Kickstart_Timezone:                    America/Los_Angeles                     
Kickstart_PublicNTPHost:               pool.ntp.org                            
Server_Partitioning:                   force-default-root-disk-only            
rocks_version:                         5.3                                     
ssh_use_dns:                           true                                    
tripwire_mail:                         root@rocks-184.sdsc.edu                 
[root@rocks-201 ~]# rocks set attr Kickstart_PublicHostname rocks-201.sdsc.edu
[root@rocks-201 ~]# rocks set attr Kickstart_PrivateHostname rocks-201
[root@rocks-201 ~]# rocks set attr Kickstart_PublicAddress 198.202.88.201     
[root@rocks-201 ~]# rocks set attr tripwire_mail root@rocks-201.sdsc.edu 
[root@rocks-201 ~]# rocks list attr
ATTR                                   VALUE                                   
Kickstart_PublicHostname:              rocks-201.sdsc.edu                      
Info_ClusterName:                      Rocks-Cluster                           
Info_CertificateOrganization:          SDSC                                    
Info_CertificateLocality:              San Diego                               
Info_CertificateState:                 California                              
Info_CertificateCountry:               US                                      
Info_ClusterContact:                   admin@sdsc.edu                         
Info_ClusterURL:                       http://www.place.org/                   
Info_ClusterLatlong:                   N32.87 W117.22                          
Kickstart_PrivateHostname:             rocks-201                               
Kickstart_PrivateKickstartCGI:         sbin/kickstart.cgi                      
Kickstart_DistroDir:                   /export/rocks                           
Kickstart_PrivateKickstartBasedir:     install                                 
Kickstart_PublicKickstartHost:         central.rocksclusters.org               
Kickstart_PrivateDNSDomain:            local                                   
Kickstart_PublicDNSDomain:             sdsc.edu                                
Kickstart_Lang:                        en_US                                   
Kickstart_Langsupport:                 en_US                                   
Kickstart_Keyboard:                    us                                      
Kickstart_PrivateAddress:              10.1.1.1                                
Kickstart_PrivateNetmask:              255.255.0.0                             
Kickstart_PrivateNetwork:              10.1.0.0                                
Kickstart_PrivateBroadcast:            10.1.255.255                            
Kickstart_PrivateNetmaskCIDR:          16                                      
Kickstart_PrivateKickstartHost:        10.1.1.1                                
Kickstart_PrivateNTPHost:              10.1.1.1                                
Kickstart_PrivateGateway:              10.1.1.1                                
Kickstart_PrivateDNSServers:           10.1.1.1                                
Kickstart_PrivateSyslogHost:           10.1.1.1                                
Kickstart_Multicast:                   225.82.204.133                          
Kickstart_PublicAddress:               198.202.88.201                          
Kickstart_PublicNetmask:               255.255.255.0                           
Kickstart_PublicNetwork:               198.202.88.0                            
Kickstart_PublicBroadcast:             198.202.88.255                          
Kickstart_PublicNetmaskCIDR:           24                                      
Kickstart_PublicGateway:               198.202.88.20                           
Kickstart_PublicDNSServers:            198.202.75.26                           
Kickstart_PrivateRootPassword:         $1$rahNm9Uc$m1ifK.SNBt0sVghm1aNhc0      
Kickstart_PrivateSHARootPassword:      1f6ca82757a9a1f52a54a44dca2da0f5659385ff
Kickstart_PrivatePortableRootPassword: $P$BQVHJzIJ7vGSD7nBT4BItJu2m9cxTV0      
Kickstart_Timezone:                    America/Los_Angeles                     
Kickstart_PublicNTPHost:               pool.ntp.org                            
Server_Partitioning:                   force-default-root-disk-only            
rocks_version:                         5.3                                     
ssh_use_dns:                           true                                    
tripwire_mail:                         root@rocks-201.sdsc.edu
# rocks sync config
  • Remove nonexistent compute nodes.
# rocks list host
HOST         MEMBERSHIP CPUS RACK RANK RUNACTION INSTALLACTION
rocks-184:   Frontend   1    0    0    os        install      
compute-0-0: Compute    1    0    0    os        install      
compute-0-1: Compute    1    0    1    os        install      
compute-0-2: Compute    1    0    2    os        install      
compute-0-3: Compute    1    0    3    os        install      
compute-0-4: Compute    1    0    4    os        install      
compute-0-5: Compute    1    0    5    os        install      
compute-0-6: Compute    1    0    6    os        install      
compute-0-7: Compute    1    0    7    os        install      
# rocks remove host compute
root@rocks-184.sdsc.edu modified "@allhosts" in host group list
root@rocks-184.sdsc.edu modified "@allhosts" in host group list
root@rocks-184.sdsc.edu modified "@allhosts" in host group list
root@rocks-184.sdsc.edu modified "@allhosts" in host group list
root@rocks-184.sdsc.edu modified "@allhosts" in host group list
root@rocks-184.sdsc.edu modified "@allhosts" in host group list
root@rocks-184.sdsc.edu modified "@allhosts" in host group list
root@rocks-184.sdsc.edu modified "@allhosts" in host group list
  • Set interface name.
# rocks list host interface
SUBNET  IFACE MAC               IP             NETMASK       MODULE NAME               VLAN
private eth0  b6:58:ca:00:00:1f 10.1.1.1       255.255.0.0   xennet rocks-184          ----
public  eth1  b6:58:ca:00:00:20 198.202.88.184 255.255.255.0 xennet rocks-184.sdsc.edu ----
# rocks set host interface name localhost eth1 afgvm.hpcc.jp
# rocks set host interface name localhost eth0 afgvm
# rocks list host interface
SUBNET  IFACE MAC               IP             NETMASK       MODULE NAME               VLAN
private eth0  b6:58:ca:00:00:1f 10.1.1.1       255.255.0.0   xennet rocks-201          ----
public  eth1  b6:58:ca:00:00:20 198.202.88.184 255.255.255.0 xennet rocks-201.sdsc.edu ----
  • Set MAC address.
# rocks set host interface mac localhost eth0 b6:58:ca:00:00:4e
# rocks set host interface mac localhost eth1 b6:58:ca:00:00:4f
# rocks list host interface
SUBNET  IFACE MAC               IP             NETMASK       MODULE NAME               VLAN
private eth0  b6:58:ca:00:00:4e 10.1.1.1       255.255.0.0   xennet rocks-201          ----
public  eth1  b6:58:ca:00:00:4f 198.202.88.184 255.255.255.0 xennet rocks-201.sdsc.edu ----
  • Set IP address
# rocks set host interface ip localhost eth1 198.202.88.201  
# hostname rocks-201.sdsc.edu
# rocks list host interface
SUBNET  IFACE MAC               IP             NETMASK       MODULE NAME               VLAN
private eth0  b6:58:ca:00:00:4e 10.1.1.1       255.255.0.0   xennet rocks-201          ----
public  eth1  b6:58:ca:00:00:4f 198.202.88.201 255.255.255.0 xennet rocks-201.sdsc.edu ----
  • Update network information.
# rocks remove route 198.202.88.184
# rocks add route 198.202.88.201 10.1.1.1 netmask=255.255.255.255  
# rocks report host interface localhost | rocks report script | sh
    # rocks report host route localhost > /etc/sysconfig/static-routes  (???Is this really necessary???)
  • Edit system network configuration.
    vi /etc/sysconfig/network
    vi /etc/auto.home 
    vi /etc/sysconfig/iptables
    vi /etc/resolv.conf
  • Network restart and rocks sync.
    # service network restart
    # rocks sync dns
    # rocks sync config
error: commlib error: access denied (server host resolves destination host "rocks-201.local" as "(HOST_NOT_RESOLVABLE)")
ERROR: unable to send message to qmaster using port 536 on host "rocks-201.local": got send error
  • Reboot.
    # reboot
  • Rocks sync host.
    # rocks sync host network localhost

Rebuild Rocks Distribution

[root@rocks-201 ~]# cd /export/rocks/install
[root@rocks-201 install]# rocks create distro
Cleaning distribution
Resolving versions (base files)
    including "kernel" (5.3,x86_64) roll...
    including "sge" (5.3,x86_64) roll...
    including "hpc" (5.3,x86_64) roll...
    including "base" (5.3,x86_64) roll...
    including "web-server" (5.3,x86_64) roll...
    including "area51" (5.3,x86_64) roll...
    including "os" (5.3,x86_64) roll...
Including critical RPMS
Resolving versions (RPMs)
    including "kernel" (5.3,x86_64) roll...
    including "sge" (5.3,x86_64) roll...
    including "hpc" (5.3,x86_64) roll...
    including "base" (5.3,x86_64) roll...
    including "web-server" (5.3,x86_64) roll...
    including "area51" (5.3,x86_64) roll...
    including "os" (5.3,x86_64) roll...
Resolving versions (SRPMs)
    including "kernel" (5.3,x86_64) roll...
    including "sge" (5.3,x86_64) roll...
    including "hpc" (5.3,x86_64) roll...
    including "base" (5.3,x86_64) roll...
    including "web-server" (5.3,x86_64) roll...
    including "area51" (5.3,x86_64) roll...
    including "os" (5.3,x86_64) roll...
Creating files (symbolic links - fast)
Applying stage2.img
Applying updates.img
Installing XML Kickstart profiles
    installing "hpc" profiles...
    installing "web-server" profiles...
    installing "base" profiles...
    installing "sge" profiles...
    installing "area51" profiles...
    installing "kernel" profiles...
    installing "os" profiles...
    installing "site" profiles...
Creating repository
making "torrent" files for RPMS
  • Add compute nodes to rocks-201
    • on rocks-201.sdsc.edu, run "insert-ethers", choose "Compute"
    • on the fiji, find compute nodes for rocks-1201, then start compute nodes
# rocks list cluster rocks-201.sdsc.edu
FRONTEND            CLIENT NODES    TYPE
rocks-201.sdsc.edu: --------------- VM  
:                   hosted-vm-0-0-9 VM  
:                   hosted-vm-0-1-7 VM 
# rocks start host vm hosted-vm-0-0-9
# rocks start host vm hosted-vm-0-1-7
  • Copy rocks-201.sdsc.edu host certificate files to /etc/grid-security directory

Fix SGE configurations on rocks-201

  • Fix SGE submit host list
# qconf -ss
rocks-184.sdsc.edu
# qconf -as rocks-201.sdsc.edu
rocks-201.sdsc.edu added to submit host list
# qconf -ss
rocks-184.sdsc.edu
rocks-201.sdsc.edu
# qconf -ds rocks-184.sdsc.edu
root@rocks-201.sdsc.edu removed "rocks-184.sdsc.edu" from submit host list
# qconf -ss
rocks-201.sdsc.edu
  • Fix SGE administrative host list
# qconf -sh
compute-0-0.local
compute-0-1.local
compute-0-2.local
compute-0-3.local
compute-0-4.local
compute-0-5.local
compute-0-6.local
compute-0-7.local
rocks-184.local
rocks-201.local
# qconf -dh rocks-184.local
root@rocks-201.sdsc.edu removed "rocks-184.local" from administrative host list
# qconf -dh compute-0-2.local
root@rocks-201.sdsc.edu removed "compute-0-2.local" from administrative host list
# qconf -dh compute-0-3.local
root@rocks-201.sdsc.edu removed "compute-0-3.local" from administrative host list
# qconf -dh compute-0-4.local
root@rocks-201.sdsc.edu removed "compute-0-4.local" from administrative host list
# qconf -dh compute-0-5.local
root@rocks-201.sdsc.edu removed "compute-0-5.local" from administrative host list
# qconf -dh compute-0-6.local
root@rocks-201.sdsc.edu removed "compute-0-6.local" from administrative host list
# qconf -dh compute-0-7.local
root@rocks-201.sdsc.edu removed "compute-0-7.local" from administrative host list
# qconf -sh
compute-0-0.local
compute-0-1.local
rocks-201.local
  • Fix SGE configuration list
# qconf -sconfl
compute-0-0.local
compute-0-1.local
compute-0-2.local
compute-0-3.local
compute-0-4.local
compute-0-5.local
compute-0-6.local
compute-0-7.local
rocks-184.sdsc.edu
# qconf -dconf rocks-184.sdsc.edu
# qconf -aconf rocks-201.sdsc.edu
(Add 2 lines
mailer  /bin/mail
xterm   /usr/bin/X11/xterm)
# qconf -dconf compute-0-2.local
# qconf -dconf compute-0-3.local
# qconf -dconf compute-0-4.local
# qconf -dconf compute-0-5.local
# qconf -dconf compute-0-6.local
# qconf -dconf compute-0-7.local
# qconf -sconfl
compute-0-0.local
compute-0-1.local
rocks-201.sdsc.edu
  • Fix compute slots in queue configuration
# qconf -mq all.q
                      [compute-0-2.local=1],[compute-0-3.local=1], \
                      [compute-0-5.local=1],[compute-0-6.local=1], \
                      [compute-0-7.local=1],[compute-0-4.local=1]
qname                 all.q
hostlist              @allhosts
seq_no                0
load_thresholds       np_load_avg=1.75
suspend_thresholds    NONE
nsuspend              1
suspend_interval      00:05:00
priority              0
min_cpu_interval      00:05:00
processors            UNDEFINED
qtype                 BATCH INTERACTIVE
ckpt_list             NONE
pe_list               make mpich mpi orte
rerun                 FALSE
slots                 1,[compute-0-0.local=1],[compute-0-1.local=1]
tmpdir                /tmp
shell                 /bin/csh
prolog                NONE
epilog                NONE
shell_start_mode      posix_compliant
starter_method        NONE
suspend_method        NONE
resume_method         NONE
terminate_method      NONE
notify                00:00:60
owner_list            NONE
user_lists            NONE
xuser_lists           NONE
subordinate_list      NONE
complex_values        NONE
projects              NONE
xprojects             NONE
calendar              NONE
initial_state         default
s_rt                  INFINITY
h_rt                  INFINITY
s_cpu                 INFINITY
h_cpu                 INFINITY
s_fsize               INFINITY
h_fsize               INFINITY
s_data                INFINITY
h_data                INFINITY
s_stack               INFINITY
h_stack               INFINITY
s_core                INFINITY
h_core                INFINITY
s_rss                 INFINITY
h_rss                 INFINITY
s_vmem                INFINITY
h_vmem                INFINITY
  • Reboot frontend and compute nodes.

Modify globus_gram_fs_map_config.xml

  • Change the hostname from rocks-184.sdsc.edu to afgvm.hpcc.jp.
# cd $GLOBUS_LOCATION/etc/gram-service
# vi globus_gram_fs_map_config.xml
(change all rocks-184.sdsc.edu to rocks-201.sdsc.edu)

Testing

See Test_Essential_Services