A detailed synthetic example will illustrate the use of the hsar() function, including the generation of attribute data,spatial data and the extraction of the spatial weights matrices and the random effect design matrix.
In order to run the example, sp, maptools, rdgal and rgeos libraries will be loaded.
library(sp)
library(maptools)
library(rgdal)
library(rgeos)
Initialy we set up a grid data representing higher-level units, say neighbourhoods
grid_high <- GridTopology(cellcentre.offset=c(1,1),cellsize=c(1,1),cells.dim=c(10,10))
grid_sp <- SpatialGrid(grid_high)
then we create an attribute, say population, for the grid cells
population <- as.numeric(scale(rpois(100,10)))
and create a SpatialGridDataFrame
grid_data <- SpatialGridDataFrame(grid_sp,data.frame(population))
Now check the names of each grid cell and plot the four cells from the left-top corner. Also, let's create lower-level units, says 2000 individuals
par(mar=c(0,0,0,0))
plot(grid_data)
text(coordinates(grid_data),row.names(grid_data),col="red",cex=0.7)
#plot(grid_data[1:2,1:2,"population"],add=TRUE,col="blue",lwd=3)
individual_points <- spsample(grid_data,2000,"random")
row.names(individual_points) <- as.character(1:2000)
points(individual_points, pch=16,cex=0.7)
and create some attributes for individuals
att.data <- data.frame(happiness=as.numeric(scale(runif(2000,1,10))),
income=as.numeric(scale(runif(2000,1,10))),
age=as.numeric(scale(round(runif(2000,20,50)))))
row.names(att.data) <- as.character(1:2000)
individual_data <- SpatialPointsDataFrame(individual_points,att.data)
Now we extract the random effect design matrix and two spatial weights matrices to use the hsar() function.
So we start by finding out the neighbouhood where each individual is located based on the spatial locations
link.index <- over(individual_points,grid_sp)
and by extracting the population attribute of the neighbourhood where an individual is located
link.data <- over(individual_data,grid_data)
link.data <- data.frame(neighbourhood_index=link.index,link.data)
individual_data@data <- data.frame(individual_data@data,link.data)
Next we order the data so that the neighbourhood index is ordered ascendingly
model.data.sp <- individual_data[order(individual_data$neighbourhood_index),]
and we can have a look at the first 10 observations
head(model.data.sp@data,10)
## happiness income age neighbourhood_index population
## 73 -1.27854059 1.12143674 -0.7821285 1 -0.1531477
## 246 0.63686907 0.60084393 0.8634679 1 -0.1531477
## 286 0.32325681 -1.55399433 -0.4295007 1 -0.1531477
## 363 0.64880912 0.09915235 0.7459253 1 -0.1531477
## 410 0.84995545 -0.68843848 0.7459253 1 -0.1531477
## 499 0.55434380 0.47976093 -1.0172137 1 -0.1531477
## 588 0.62552999 -1.29439879 -0.5470433 1 -0.1531477
## 795 -1.48467347 -1.27566449 0.9810105 1 -0.1531477
## 895 0.04221471 -1.70343526 -0.5470433 1 -0.1531477
## 897 1.38209510 -1.60026435 0.8634679 1 -0.1531477
Let's define the random effect matrix which captures the number of individuals within each neighbourhood
MM <- as.data.frame(table(model.data.sp$neighbourhood_index))
Find the total number of neighbourhood (100 in our case)
Utotal <- dim(MM)[1]
Unum <- MM[,2]
Uid <- rep(c(1:Utotal),Unum)
Define the random effect matrix (Delta) with a dimension of 2000x100
library(spdep)
n <- nrow(model.data.sp)
Delta <- matrix(0,nrow=n,ncol=Utotal)
for(i in 1:Utotal) {
Delta[Uid==i,i] <- 1
}
rm(i)
Delta <- as(Delta,"dgCMatrix")
Evaluate the neighbourhood-level (higher-level) spatial weights matrix based on sharing a common border (Rook's rule). Here we should note that if neighbourhoods are represented by spatial polygons, the extraction of the neighbour list and weights matrix is obtained through the poly2nb() and nb2listw() (or nb2mat()) functions in the R spdep package.
temp.coords <- coordinates(grid_sp)
distance <- array(0,c(Utotal,Utotal))
M <- array(0,c(Utotal,Utotal))
for(i in 1:Utotal) {
for(j in 1:Utotal){
temp <- (temp.coords[i,1] - temp.coords[j,1])^2 + (temp.coords[i,2] - temp.coords[j,2])^2
distance[i,j] <- sqrt(temp)
if(temp == 1) M[i,j] <- 1
}
}
Then row-normalize M
M <- M / rowSums(M)
M <- as(M,"dgCMatrix")
Then create the individual-level spatial weights matrix W. We simply assume each individual is interacting with other individuals located within a radius of 1.5 units. The intensity of interaction is measured by how far individuals are separated with a Gaussian distance decay kernel
library(RColorBrewer)
library(classInt)
So create the neighbour list
nb.15 <- dnearneigh(model.data.sp,0,1.5)
and the weights matrix W
dist.15 <- nbdists(nb.15,model.data.sp)
dist.15 <- lapply(dist.15,function(x) exp(-0.5 * (x / 1.5)^2))
mat.15 <- nb2mat(nb.15,glist=dist.15,style="W")
W <- as(mat.15,"dgCMatrix")
Now let's simulate our outcome variable i.e. health of each individual using arbitary values of model parameters.
rho <- 0.2
lambda <- 0.7
sigma2e <- 0.8
sigma2u <- 0.4
betas <- c(3,2,3,4,5)
In order to run the models we simulate neighbourhood-level random effect
thetas <- as.numeric(solve(diag(Utotal) - lambda*M) %*% rnorm(Utotal,mean=0,sd=sqrt(sigma2u)))
grid_data$thetas_true <- thetas
image(grid_data,"thetas_true")
and simulate an individual-level outcome variable — health
X.mat <- as.matrix(model.data.sp@data[,c("happiness","income","age","population")])
X.mat <- cbind(rep(1,n),X.mat)
health <- as.numeric(solve(diag(n) - rho*W) %*% (X.mat %*% betas + Delta %*% thetas + rnorm(n,0,sqrt(sigma2e))))
model.data.sp$health <- health
So we wun the hsar() function
library(HSAR)
res.formula <- health ~ happiness + income + age + population
res <- hsar(res.formula,data=model.data.sp@data,W=W,M=M,Delta=Delta,burnin=100, Nsim=200)
summary(res)
##
## Call:
## hsar(formula = res.formula, data = model.data.sp@data, W = W,
## M = M, Delta = Delta, burnin = 100, Nsim = 200)
## Type: hsar
##
## Coefficients:
## Mean SD
## (Intercept) 3.063157 0.11265352
## happiness 2.013664 0.02127280
## income 2.991656 0.01798086
## age 4.038902 0.02120255
## population 4.973759 0.07692483
##
## Spatial Coefficients:
## rho lambda
## [1,] 0.24018 0.72172
##
## Diagnostics
## Deviance information criterion (DIC): 5369.704
## Effective number of parameters (pd): 134.0597
## Log likelihood: -2550.792
## Pseudo R squared: 0.9859406
##
## Impacts:
## direct indirect total
## (Intercept) 3.064950 0.9657010 4.030651
## happiness 2.014842 0.6348344 2.649677
## income 2.993407 0.9431594 3.936566
## age 4.041265 1.2733176 5.314583
## population 4.976670 1.5680438 6.544714
##
## Quantiles:
## 5% 25% 50% 75% 95%
## (Intercept) 2.845421 3.007351 3.068628 3.138370 3.213482
## happiness 1.979406 1.996749 2.014244 2.026528 2.047218
## income 2.961977 2.978252 2.995017 3.002382 3.024893
## age 4.008299 4.021580 4.034428 4.055881 4.073513
## population 4.853948 4.920624 4.976291 5.021743 5.090672
and visualise the estimated neighbourhood effect
grid_data$theta.est <- thetas.est <- as.numeric(res$Mus)
image(grid_data,"theta.est",breaks=as.numeric(quantile(thetas.est)),col=brewer.pal(4,"Reds"))
Furher let's run another two simpler models Firstly, a model assuming rho = 0, that is, no interaction effects at the individual level
res_1 <- hsar(res.formula,data=model.data.sp@data,W=NULL,M=M,Delta=Delta,burnin=100, Nsim=200)
summary(res_1)
and secondly, a model assuming lambda = 0, that is, no interaction effects at the neighbourhood level
res_2 <- hsar(res.formula,data=model.data.sp@data,W=W,M=NULL,Delta=Delta,burnin=100, Nsim=200)
summary(res_2)