Using mgwrsar package

The MGWRSAR function requires a dataframe and a matrix of coordinates, and eventually a spatial weight matrix if model include spatiale dependence. The package includes a simulated data example which is used for this vignette.

library(mgwrsar)
#> Loading required package: Rcpp
#> Loading required package: sp
#> Loading required package: leaflet
#> Loading required package: Matrix
## loading data example
data(data_mgwrsar)
coord=as.matrix(mydata[,c("x_lat","y_lon")])
## Creating a spatial weigth matrix (sparce dgCMatrix) of 8 nearest neighbors
W=KNN(coord,8)

Estimation

Estimation of a GWR with a gauss kernel with and without parallel computation:

### with parallel computing
#ptm1<-proc.time()
#model_GWR<-MGWRSAR(formula = 'Y_gwr~X1+X2+X3', data = mydata,coord=coord, #fixed_vars=NULL,kernels=c('gauss'),H=0.13, Model = 'GWR',control=list(SE=TRUE,doMC=TRUE,ncore=4))
#(proc.time()-ptm1)[3]

### without parallel computing
ptm1<-proc.time()
model_GWR<-MGWRSAR(formula = 'Y_gwr~X1+X2+X3', data = mydata,coord=coord, fixed_vars=NULL,kernels=c('gauss'),H=0.13, Model = 'GWR',control=list(SE=TRUE,doMC=FALSE))
(proc.time()-ptm1)[3]
#> elapsed 
#>   1.004

Summary and plot of mgwrsar object using leaflet:

summary_mgwrsar(model_GWR)
#> Call:
#> MGWRSAR(formula = "Y_gwr~X1+X2+X3", data = mydata, coord = coord, 
#>     fixed_vars = NULL, kernels = c("gauss"), H = 0.13, Model = "GWR", 
#>     control = list(SE = TRUE, doMC = FALSE))
#> Model: GWR 
#> Kernels function: gauss 
#> Kernels type: GD 
#> bandwidth: 0.13 
#> Number of data points: 1000 
#> Varying parameters: Intercept X1 X2 X3 
#>         Intercept       X1       X2      X3
#> Min.     -2.63740  0.16880 -0.37306 -1.7588
#> 1st Qu.  -1.46100  0.38590  1.52729 -1.3070
#> Median   -1.08812  0.51409  1.94806 -1.0194
#> Mean     -0.97597  0.48879  1.94778 -1.0289
#> 3rd Qu.  -0.62307  0.59454  2.58871 -0.7439
#> Max.      1.06100  0.79570  3.37755 -0.3083
#> Effective degrees of freedom: 955.0674 
#> AIC 135.0056 
#> Residual sum of squares: 64.0684
plot_mgwrsar(model_GWR,type='B_coef',var='X2')

plot_mgwrsar(model_GWR,type='t_coef',var='X2')

Estimation of a GWR with spgwr package:

library(spgwr)
#> Loading required package: spData
#> To access larger datasets in this package, install the spDataLarge
#> package with: `install.packages('spDataLarge')`
#> NOTE: This package does not constitute approval of GWR
#> as a method of spatial analysis; see example(gwr)
mydataSP=mydata
coordinates(mydataSP)=c('x_lat','y_lon')
ptm1<-proc.time()
model_spgwr <- gwr(Y_gwr~X1+X2+X3, data=mydataSP, bandwidth=0.13,hatmatrix=TRUE)
(proc.time()-ptm1)[3]
#> elapsed 
#>  15.977
head(model_spgwr$SDF@data$gwr.e)
#> [1] -0.1585824 -0.1861144 -0.4450174 -0.1767796 -0.2014330  0.2670349
model_spgwr
#> Call:
#> gwr(formula = Y_gwr ~ X1 + X2 + X3, data = mydataSP, bandwidth = 0.13, 
#>     hatmatrix = TRUE)
#> Kernel function: gwr.Gauss 
#> Fixed bandwidth: 0.13 
#> Summary of GWR coefficient estimates at data points:
#>                  Min.  1st Qu.   Median  3rd Qu.     Max.  Global
#> X.Intercept. -2.63740 -1.46100 -1.08812 -0.62307  1.06100 -1.4845
#> X1            0.16880  0.38590  0.51409  0.59454  0.79570  0.5000
#> X2           -0.37306  1.52729  1.94806  2.58871  3.37755  2.5481
#> X3           -1.75876 -1.30704 -1.01941 -0.74386 -0.30828 -1.0762
#> Number of data points: 1000 
#> Effective number of parameters (residual: 2traceS - traceS'S): 62.85371 
#> Effective degrees of freedom (residual: 2traceS - traceS'S): 937.1463 
#> Sigma (residual: 2traceS - traceS'S): 0.2614678 
#> Effective number of parameters (model: traceS): 44.93259 
#> Effective degrees of freedom (model: traceS): 955.0674 
#> Sigma (model: traceS): 0.2590031 
#> Sigma (ML): 0.2531174 
#> AICc (GWR p. 61, eq 2.33; p. 96, eq. 4.21): 186.462 
#> AIC (GWR p. 96, eq. 4.22): 135.0056 
#> Residual sum of squares: 64.0684 
#> Quasi-global R2: 0.9813492

all(abs(model_GWR$residuals-model_spgwr$SDF@data$gwr.e)<0.00000000001)
#> [1] TRUE

Estimation of a GWR with leave-one out CV, using an adaptive bisquare kernel (based on 20 nearest neighbors) and a local Outlier Detection/Removal procedure:

model_GWR<-MGWRSAR(formula = 'Y_gwr~X1+X2+X3', data = mydata,coord=coord, fixed_vars=NULL,kernels=c('bisq_knn'),H=20, Model = 'GWR',control=list(isgcv=TRUE,remove_local_outlier=TRUE,outv=0.01))
summary_mgwrsar(model_GWR)
#> Call:
#> MGWRSAR(formula = "Y_gwr~X1+X2+X3", data = mydata, coord = coord, 
#>     fixed_vars = NULL, kernels = c("bisq_knn"), H = 20, Model = "GWR", 
#>     control = list(isgcv = TRUE, remove_local_outlier = TRUE, 
#>         outv = 0.01))
#> Model: GWR 
#> Kernels function: bisq_knn 
#> Kernels type: GD 
#> bandwidth: 20 
#> Number of data points: 1000 
#> Varying parameters: Intercept X1 X2 X3 
#>         Intercept        X1        X2      X3
#> Min.    -4.223718  0.001589 -1.182832 -2.0699
#> 1st Qu. -1.448438  0.347360  1.026678 -1.2974
#> Median  -0.773579  0.537315  1.660816 -1.0162
#> Mean    -0.797981  0.497172  1.665602 -1.0071
#> 3rd Qu. -0.173053  0.640754  2.456682 -0.7263
#> Max.     2.267588  0.911069  4.103331 -0.0933
#> Residual sum of squares: 16.54259

Estimation of a MGWR with stationary intercept using an adapative gaussian kernel (based on 20 nearest neighbors) + leave-one out CV estimation


model_MGWR<-MGWRSAR(formula = 'Y_mgwr~X1+X2+X3', data = mydata,coord=coord, fixed_vars=c('Intercept'),kernels=c('gauss_adapt'),H=20, Model = 'MGWR',control=list(SE=TRUE))
summary_mgwrsar(model_MGWR)
#> Call:
#> MGWRSAR(formula = "Y_mgwr~X1+X2+X3", data = mydata, coord = coord, 
#>     fixed_vars = c("Intercept"), kernels = c("gauss_adapt"), 
#>     H = 20, Model = "MGWR", control = list(SE = TRUE))
#> Model: MGWR 
#> Kernels function: gauss_adapt 
#> Kernels type: GD 
#> bandwidth: 20 
#> Number of data points: 1000 
#> Constant parameters: Intercept 
#> -0.5563375 
#> Varying parameters: X1 X2 X3 
#>              X1      X2      X3
#> Min.    0.17788 0.53761 -1.4822
#> 1st Qu. 0.37102 1.25611 -1.1637
#> Median  0.50101 1.62152 -1.0894
#> Mean    0.48714 1.62199 -1.0462
#> 3rd Qu. 0.58890 2.05394 -0.9554
#> Max.    0.82570 2.71433 -0.5656
#> Effective degrees of freedom: 924.9957 
#> AIC -272.9127 
#> Residual sum of squares: 41.38677
                       
                       
### leave-one out CV estimation
model_MGWR<-MGWRSAR(formula = 'Y_mgwr~X1+X2+X3', data = mydata,coord=coord, fixed_vars='Intercept',kernels=c('gauss_adapt'),H=20, Model = 'MGWR',control=list(isgcv=TRUE,remove_local_outlier=FALSE,outv=NULL))
summary_mgwrsar(model_MGWR)
#> Call:
#> MGWRSAR(formula = "Y_mgwr~X1+X2+X3", data = mydata, coord = coord, 
#>     fixed_vars = "Intercept", kernels = c("gauss_adapt"), H = 20, 
#>     Model = "MGWR", control = list(isgcv = TRUE, remove_local_outlier = FALSE, 
#>         outv = NULL))
#> Model: MGWR 
#> Kernels function: gauss_adapt 
#> Kernels type: GD 
#> bandwidth: 20 
#> Number of data points: 1000 
#> Constant parameters: Intercept 
#> -0.5836998 
#> Varying parameters: X1 X2 X3 
#>              X1      X2      X3
#> Min.    0.17826 0.53262 -1.4776
#> 1st Qu. 0.37341 1.27013 -1.1609
#> Median  0.50187 1.64459 -1.0867
#> Mean    0.48805 1.63952 -1.0424
#> 3rd Qu. 0.58608 2.07933 -0.9529
#> Max.    0.82377 2.72156 -0.5559
#> Residual sum of squares: 50.18494

Estimation of a MGWR with stationary intercept, using an adapative gaussian kernel (based on 20 nearest neighbors):

model_MGWRSAR_1_0_kv<-MGWRSAR(formula = 'Y_mgwrsar_1_0_kv~X1+X2+X3', data = mydata,coord=coord, fixed_vars=NULL,kernels=c('gauss_adapt'),H=20, Model = 'MGWRSAR_1_0_kv',control=list(W=W,Lambdacor=TRUE,SE=TRUE))
summary_mgwrsar(model_MGWRSAR_1_0_kv)
#> Call:
#> MGWRSAR(formula = "Y_mgwrsar_1_0_kv~X1+X2+X3", data = mydata, 
#>     coord = coord, fixed_vars = NULL, kernels = c("gauss_adapt"), 
#>     H = 20, Model = "MGWRSAR_1_0_kv", control = list(W = W, Lambdacor = TRUE, 
#>         SE = TRUE))
#> Model: MGWRSAR_1_0_kv 
#> Method for spatial autocorrelation: MGWRSAR_1_0_kv 
#> Kernels function: gauss_adapt 
#> Kernels type: GD 
#> bandwidth: 20 
#> Number of data points: 1000 
#> Varying parameters: Intercept X1 X2 X3  
#>         Intercept       X1       X2       X3  lambda
#> Min.     -8.94883  0.10555 -2.65846 -2.10631 -0.2844
#> 1st Qu.  -1.74523  0.37095  0.81483 -1.29394  0.1355
#> Median   -0.99141  0.54257  2.11968 -1.03264  0.2965
#> Mean     -1.06465  0.50832  2.18127 -1.07079  0.3152
#> 3rd Qu.  -0.21901  0.63831  3.55568 -0.73756  0.4899
#> Max.      2.20903  0.94188  9.84625 -0.34718  0.9153
#> Effective degrees of freedom: 893.3478 
#> AIC 2158.486 
#> Residual sum of squares: 456.0999

Estimation of a mgwrsar(0,kc,kv) model with stationary intercept and stationnary spatial multiplier using an adapative gaussian kernel (based on 20 nearest neighbors):

model_MGWRSAR_0_kc_kv<-MGWRSAR(formula = 'Y_mgwrsar_0_kc_kv~X1+X2+X3', data = mydata,coord=coord, fixed_vars=c('Intercept'),kernels=c('gauss_adapt'),H=20, Model = 'MGWRSAR_0_kc_kv',control=list(W=W))
summary_mgwrsar(model_MGWRSAR_0_kc_kv)
#> Call:
#> MGWRSAR(formula = "Y_mgwrsar_0_kc_kv~X1+X2+X3", data = mydata, 
#>     coord = coord, fixed_vars = c("Intercept"), kernels = c("gauss_adapt"), 
#>     H = 20, Model = "MGWRSAR_0_kc_kv", control = list(W = W))
#> Model: MGWRSAR_0_kc_kv 
#> Method for spatial autocorrelation: MGWRSAR_0_kc_kv 
#> Kernels function: gauss_adapt 
#> Kernels type: GD 
#> bandwidth: 20 
#> Number of data points: 1000 
#> Constant parameters: Intercept  
#> -0.3140325 0.2254189 
#> Varying parameters: X1 X2 X3 
#>               X1       X2      X3
#> Min.     0.18113 -0.19560 -1.6156
#> 1st Qu.  0.38038  1.30055 -1.2737
#> Median   0.50396  1.92790 -1.1129
#> Mean     0.49373  1.93858 -1.0928
#> 3rd Qu.  0.59936  2.80034 -0.9463
#> Max.     0.86079  3.67151 -0.4367
#> Residual sum of squares: 122.6273

Estimation of a mgwrsar(0,0,kv) model with stationnary spatial multiplier using an adapative gaussian kernel (based on 20 nearest neighbors):

model_MGWRSAR_0_0_kv<-MGWRSAR(formula = 'Y_mgwrsar_0_0_kv~X1+X2+X3', data = mydata,coord=coord, fixed_vars=NULL,kernels=c('gauss_adapt'),H=20, Model = 'MGWRSAR_0_0_kv',control=list(W=W))
summary_mgwrsar(model_MGWRSAR_0_0_kv)
#> Call:
#> MGWRSAR(formula = "Y_mgwrsar_0_0_kv~X1+X2+X3", data = mydata, 
#>     coord = coord, fixed_vars = NULL, kernels = c("gauss_adapt"), 
#>     H = 20, Model = "MGWRSAR_0_0_kv", control = list(W = W))
#> Model: MGWRSAR_0_0_kv 
#> Method for spatial autocorrelation: MGWRSAR_0_0_kv 
#> Kernels function: gauss_adapt 
#> Kernels type: GD 
#> bandwidth: 20 
#> Number of data points: 1000 
#> Constant parameters: 
#> 0.250141 
#> Varying parameters: Intercept X1 X2 X3 
#>         Intercept        X1        X2      X3
#> Min.    -4.928243  0.104342 -1.979095 -2.0943
#> 1st Qu. -1.429473  0.365563  1.262898 -1.2896
#> Median  -0.779578  0.536273  2.011070 -0.9916
#> Mean    -0.689637  0.502558  2.023040 -1.0512
#> 3rd Qu. -0.094623  0.630135  3.365939 -0.7410
#> Max.     2.327138  0.891468  5.401186 -0.3125
#> Residual sum of squares: 93.50372

Estimation of a mgwrsar(1,kv,kv) model with stationary intercept using an adapative gaussian kernel (based on 20 nearest neighbors):

model_MGWRSAR_1_kc_kv<-MGWRSAR(formula = 'Y_mgwrsar_1_kc_kv~X1+X2+X3', data = mydata,coord=coord, fixed_vars=c('Intercept'),kernels=c('gauss_adapt'),H=20, Model = 'MGWRSAR_1_kc_kv',control=list(W=W))
summary_mgwrsar(model_MGWRSAR_1_kc_kv)
#> Call:
#> MGWRSAR(formula = "Y_mgwrsar_1_kc_kv~X1+X2+X3", data = mydata, 
#>     coord = coord, fixed_vars = c("Intercept"), kernels = c("gauss_adapt"), 
#>     H = 20, Model = "MGWRSAR_1_kc_kv", control = list(W = W))
#> Model: MGWRSAR_1_kc_kv 
#> Method for spatial autocorrelation: MGWRSAR_1_kc_kv 
#> Kernels function: gauss_adapt 
#> Kernels type: GD 
#> bandwidth: 20 
#> Number of data points: 1000 
#> Constant parameters: Intercept 
#> -0.6698754 
#> Varying parameters: X1 X2 X3  
#>               X1       X2       X3  lambda
#> Min.     0.19982 -0.46764 -1.90291 -0.2478
#> 1st Qu.  0.41206  1.13441 -1.27422  0.1194
#> Median   0.52169  1.84084 -1.09489  0.2813
#> Mean     0.50659  1.96433 -1.09162  0.3263
#> 3rd Qu.  0.60433  2.86682 -0.88669  0.5122
#> Max.     0.83615  4.86693 -0.44568  1.2080
#> Residual sum of squares: 869.4819

Bootstrap test

mgwrsar_bootstrap_test function allows to perform a bootstrap test for Betas for mgwrsar class model (with parallel computation). Could be long, not run here.

model_GWR<-MGWRSAR(formula = 'Y_mgwr~X1+X2+X3', data = mydata,coord=coord, fixed_vars=NULL,kernels=c('gauss_adapt'),H=20, Model = 'GWR',control=list(SE=TRUE))
summary_mgwrsar(model_GWR)

model_MGWR<-MGWRSAR(formula = 'Y_mgwr~X1+X2+X3', data = mydata,coord=coord, fixed_vars=c('Intercept'),kernels=c('gauss_adapt'),H=20, Model = 'MGWR',control=list(SE=TRUE))
summary_mgwrsar(model_MGWR)

test<-mgwrsar_bootstrap_test(model_MGWR,model_GWR,B=30,domc=FALSE,ncore=1,type='standard',eps='H1',df='H1',focal='median',D=NULL)

# result 
# > test
# > p_star   0
# > T 69.92265

Prediction

In this example, we use rows 1 to 800 as insample data for model estimation and rows 801 to 1000 as outsample for prediction.

Prediction of GWR model using sheppard kernel with 8 neighbors:

model_GWR_insample<-MGWRSAR(formula = 'Y_gwr~X1+X2+X3', data = mydata[1:800,],coord=coord[1:800,], fixed_vars=NULL,kernels=c('gauss_adapt'),H=8, Model = 'GWR',control=list())
Y_pred=predict_mgwrsar(model_GWR_insample, newdata=mydata[801:1000,], newdata_coord=coord[801:1000,], k_extra = 8, kernel_extra = "sheppard")
head(Y_pred)
#> [1] 0.5111469 2.8323339 2.8674031 0.2058560 1.8421794 0.6162536
head(mydata$Y_gwr[801:1000])
#> [1] 0.2490662 2.8329445 2.7107657 0.1949003 1.7104271 0.6286141
sqrt(mean((mydata$Y_gwr[801:1000]-Y_pred)^2)) # RMSE
#> [1] 0.1343204

Prediction of MGWRSAR_1_0_kv model using sheppard kernel with 8 neighbors and Best Linear Unbiased Predictor (BLUP) :

model_MGWRSAR_1_0_kv_insample<-MGWRSAR(formula = 'Y_mgwrsar_1_0_kv~X1+X2+X3', data = mydata[1:800,],coord=coord[1:800,], fixed_vars=NULL,kernels=c('gauss_adapt'),H=20, Model = 'MGWRSAR_1_0_kv',control=list(W=W[1:800,1:800]))
summary_mgwrsar(model_MGWRSAR_1_0_kv_insample)
#> Call:
#> MGWRSAR(formula = "Y_mgwrsar_1_0_kv~X1+X2+X3", data = mydata[1:800, 
#>     ], coord = coord[1:800, ], fixed_vars = NULL, kernels = c("gauss_adapt"), 
#>     H = 20, Model = "MGWRSAR_1_0_kv", control = list(W = W[1:800, 
#>         1:800]))
#> Model: MGWRSAR_1_0_kv 
#> Method for spatial autocorrelation: MGWRSAR_1_0_kv 
#> Kernels function: gauss_adapt 
#> Kernels type: GD 
#> bandwidth: 20 
#> Number of data points: 800 
#> Varying parameters: Intercept X1 X2 X3  
#>         Intercept       X1       X2       X3  lambda
#> Min.     -7.05380  0.10543 -2.35128 -2.19224 -0.5765
#> 1st Qu.  -1.92244  0.39116  0.91679 -1.33372  0.0956
#> Median   -0.89820  0.54991  2.10489 -1.00608  0.2728
#> Mean     -0.99662  0.51309  2.61515 -1.09223  0.2588
#> 3rd Qu.  -0.20394  0.64410  4.09003 -0.72127  0.4379
#> Max.      1.98675  0.84522  9.77120 -0.43915  1.2524
#> Residual sum of squares: 352.3345

## Using Best Linear Unbiased Predictor 
Y_pred=predict_mgwrsar(model_MGWRSAR_1_0_kv_insample, newdata=mydata[801:1000,], newdata_coord=coord[801:1000,], k_extra = 12,  W = W, type = "BPN", kernel_extra = "sheppard")
head(Y_pred)
#>           [,1]
#> [1,] 0.8833344
#> [2,] 5.5199667
#> [3,] 4.1304483
#> [4,] 1.1905310
#> [5,] 2.9239718
#> [6,] 1.8684993
head(mydata$Y_mgwrsar_1_0_kv[801:1000])
#> [1] 0.5400965 5.0848714 3.9300001 1.1680551 2.4675708 0.9905510
sqrt(mean((mydata$Y_mgwrsar_1_0_kv[801:1000]-Y_pred)^2))
#> [1] 8.442365

## without Best Linear Unbiased Predictor
Y_pred=predict_mgwrsar(model_MGWRSAR_1_0_kv_insample, newdata=mydata[801:1000,], newdata_coord=coord[801:1000,], k_extra = 12,  W = W, type = "TC", kernel_extra = "sheppard")
head(Y_pred)
#> [1] 1.435196 5.666258 4.219273 1.388306 3.459105 1.706876
head(mydata$Y_mgwrsar_1_0_kv[801:1000])
#> [1] 0.5400965 5.0848714 3.9300001 1.1680551 2.4675708 0.9905510
sqrt(mean((mydata$Y_mgwrsar_1_0_kv[801:1000]-Y_pred)^2))
#> [1] 54.01216

Optimal bandwidths search

In the following example, we show how to find optimal bandwidth by hand.


######################
#### Finding bandwidth by hand
#####################

model_GWR<-MGWRSAR(formula = 'Y_gwr~X1+X2+X3', data = mydata,coord=coord, fixed_vars=NULL,kernels=c('bisq_knn'),H=50, Model = 'GWR',control=list(isgcv=FALSE,minv=1))
summary_mgwrsar(model_GWR)
#> Call:
#> MGWRSAR(formula = "Y_gwr~X1+X2+X3", data = mydata, coord = coord, 
#>     fixed_vars = NULL, kernels = c("bisq_knn"), H = 50, Model = "GWR", 
#>     control = list(isgcv = FALSE, minv = 1))
#> Model: GWR 
#> Kernels function: bisq_knn 
#> Kernels type: GD 
#> bandwidth: 50 
#> Number of data points: 1000 
#> Varying parameters: Intercept X1 X2 X3 
#>         Intercept        X1        X2      X3
#> Min.    -2.963548  0.049927 -0.710652 -1.8541
#> 1st Qu. -1.506600  0.349613  1.126145 -1.2796
#> Median  -0.847094  0.546082  1.672279 -1.0021
#> Mean    -0.821191  0.496747  1.683377 -1.0047
#> 3rd Qu. -0.251085  0.627595  2.508024 -0.7218
#> Max.     1.554918  0.855126  3.595044 -0.1968
#> Residual sum of squares: 7.606539

myCV<-function(H){
  model_GWR<-MGWRSAR(formula = 'Y_gwr~X1+X2+X3', data = mydata,coord=coord, fixed_vars=NULL,kernels=c('gauss_adapt'),H, Model = 'GWR',control=list(isgcv=TRUE))
  cat('... Try h=',H,' ')
  model_GWR$SSR
}

res=optimize(myCV,upper=500,lower=10)
#> ... Try h= 197.1633  ... Try h= 312.8367  ... Try h= 125.6733  ... Try h= 81.49004  ... Try h= 54.18327  ... Try h= 37.30676  ... Try h= 26.87651  ... Try h= 20.43026  ... Try h= 16.44625  ... Try h= 13.984  ... Try h= 12.46225  ... Try h= 11.84349  ... Try h= 11.13934  ... Try h= 11.49141  ... Try h= 11.35693  ... Try h= 11.27382  ... Try h= 11.22245  ... Try h= 11.19071  ... Try h= 11.17109  ... Try h= 11.15896  ... Try h= 11.15147  ... Try h= 11.14683  ... Try h= 11.14397  ... Try h= 11.1422  ... Try h= 11.14111  ... Try h= 11.14043  ... Try h= 11.14001  ... Try h= 11.13976  ... Try h= 11.1396  ... Try h= 11.1395  ... Try h= 11.13944  ... Try h= 11.1394  ... Try h= 11.1394
res
#> $minimum
#> [1] 11.1394
#> 
#> $objective
#> [1] 15.32105


## model with optimal bandwith with adaptative gaussian kernel

model_GWR<-MGWRSAR(formula = 'Y_gwr~X1+X2+X3', data = mydata,coord=coord, fixed_vars=NULL,kernels=c('gauss_adapt'),H=ceiling(res$minimum), Model = 'GWR',control=list(isgcv=FALSE))
summary_mgwrsar(model_GWR)
#> Call:
#> MGWRSAR(formula = "Y_gwr~X1+X2+X3", data = mydata, coord = coord, 
#>     fixed_vars = NULL, kernels = c("gauss_adapt"), H = ceiling(res$minimum), 
#>     Model = "GWR", control = list(isgcv = FALSE))
#> Model: GWR 
#> Kernels function: gauss_adapt 
#> Kernels type: GD 
#> bandwidth: 12 
#> Number of data points: 1000 
#> Varying parameters: Intercept X1 X2 X3 
#>         Intercept        X1        X2      X3
#> Min.    -3.053886  0.082189 -0.706409 -1.8197
#> 1st Qu. -1.500391  0.352994  1.184793 -1.2743
#> Median  -0.885585  0.541126  1.726578 -1.0048
#> Mean    -0.835642  0.498025  1.705481 -1.0081
#> 3rd Qu. -0.310028  0.624075  2.512837 -0.7302
#> Max.     1.519882  0.876498  3.423344 -0.1999
#> Residual sum of squares: 11.23341

mgwrsar package provides a wrapper that allows to find optimal bandwidth for a selction of model and kernel types. It is designed only for spatial kernel (type=‘GD’). It stores results for all models and kernels using an optimal bandwidth (leave-one out CV criteria) and provides both results with and without leave-one-out sampling. It also allows to find an optimal spatial weight matrix for a given kernel when the model includes spatial dependence.

Example of automatic search of optimal bandwidth for GWR and MGWR (stationnary intercept) models using either an adaptive gaussian kernel or adapative bisquare kernel or gaussian kernel, with at least 8 neigbhors for adaptive kernel and at least a bandwidth of 0.03 for gaussian kernel:

mytab<-bandwidths_mgwrsar(formula = 'Y_gwr~X1+X2+X3', data = mydata,coord=coord, fixed_vars=c('Intercept','X1'),Models=c('GWR','MGWR'),Kernels=c('bisq_knn','gauss_adapt','gauss'),control=list(),control_search=list(lower_d=8,lower_c=0.03,upper_c=0.65))
#> #####  GWR  #####
#> #####  bisq_knn  #####
#> 
#> ########
#> Search bandwidth stage
#> ########
#> Bandwidth h: Searching support for kernel bisq_knn  with Model GWR 
#> Bandwidth h: Searching optimal bandwidth for kernel bisq_knn  with Model GWR   ... 
#> 8  . 11  . 15  . 15  . 17  . 19  . 19  . 20  . 21  .  opt$objective =  9.673904  opt$minimum =  22 
#> 
#> #####  gauss_adapt  #####
#> 
#> ########
#> Search bandwidth stage
#> ########
#> Bandwidth h: Searching support for kernel gauss_adapt  with Model GWR 
#> Bandwidth h: Searching optimal bandwidth for kernel gauss_adapt  with Model GWR   ... 
#>  opt$objective =  12.51025  opt$minimum =  8 
#> 
#> #####  gauss  #####
#> 
#> ########
#> Search bandwidth stage
#> ########
#> Bandwidth h: Searching support for kernel gauss  with Model GWR 
#> Bandwidth h: Searching optimal bandwidth for kernel gauss  with Model GWR   ... 
#>  opt$objective =  39.85757  opt$minimum =  0.06436968 
#> 
#> #####  MGWR  #####
#> #####  bisq_knn  #####
#> 
#> ########
#> Search bandwidth stage
#> ########
#> Bandwidth h: Searching support for kernel bisq_knn  with Model MGWR 
#> Bandwidth h: Searching optimal bandwidth for kernel bisq_knn  with Model MGWR   ... 
#> 8  . 11  . 15  . 20  . 26  . 34  . 45  . 59  . 77  . 77  . 85  . 85  . 86  .  opt$objective =  202.2867  opt$minimum =  87 
#> 
#> #####  gauss_adapt  #####
#> 
#> ########
#> Search bandwidth stage
#> ########
#> Bandwidth h: Searching support for kernel gauss_adapt  with Model MGWR 
#> Bandwidth h: Searching optimal bandwidth for kernel gauss_adapt  with Model MGWR   ... 
#> 8  . 11  . 11  . 13  . 13  .  opt$objective =  204.6507  opt$minimum =  14 
#> 
#> #####  gauss  #####
#> 
#> ########
#> Search bandwidth stage
#> ########
#> Bandwidth h: Searching support for kernel gauss  with Model MGWR 
#> Bandwidth h: Searching optimal bandwidth for kernel gauss  with Model MGWR   ... 
#>  opt$objective =  212.2765  opt$minimum =  0.05796605

names(mytab)
#> [1] "GWR"    "MGWR"   "GWR_2"  "GWR_3"  "MGWR_2" "MGWR_3"
names(mytab[['GWR']])
#> [1] "config_model" "CV"           "SSR"          "model"

mytab[['GWR']]$config_model
#> [1] "GWR"      "bisq_knn" "22"       "unknow"   "0"
mytab[['GWR']]$CV
#> [1] 9.673904
summary(mytab[['GWR']]$model$Betav)
#>    Intercept             X1               X2                X3          
#>  Min.   :-3.6889   Min.   :0.0165   Min.   :-0.8843   Min.   :-2.12121  
#>  1st Qu.:-1.4614   1st Qu.:0.3526   1st Qu.: 1.0836   1st Qu.:-1.29786  
#>  Median :-0.8098   Median :0.5425   Median : 1.6525   Median :-1.00958  
#>  Mean   :-0.8239   Mean   :0.4992   Mean   : 1.6717   Mean   :-1.00068  
#>  3rd Qu.:-0.2008   3rd Qu.:0.6358   3rd Qu.: 2.4624   3rd Qu.:-0.71484  
#>  Max.   : 2.1706   Max.   :0.9191   Max.   : 3.7134   Max.   :-0.03991

mybestmodel=mytab[['GWR']]$model
plot_mgwrsar(mybestmodel,type='B_coef',var='X2')


mytab[['GWR_2']]$config_model
#> [1] "GWR"         "gauss_adapt" "8"           "unknow"      "0"
mytab[['GWR_2']]$CV
#> [1] 12.51025
summary(mytab[['GWR_2']]$model$Betav)
#>    Intercept             X1                X2                X3         
#>  Min.   :-3.1688   Min.   :0.06144   Min.   :-0.6861   Min.   :-1.8886  
#>  1st Qu.:-1.5131   1st Qu.:0.35222   1st Qu.: 1.1450   1st Qu.:-1.2755  
#>  Median :-0.8678   Median :0.54507   Median : 1.6688   Median :-1.0026  
#>  Mean   :-0.8315   Mean   :0.49891   Mean   : 1.6891   Mean   :-1.0046  
#>  3rd Qu.:-0.2661   3rd Qu.:0.62965   3rd Qu.: 2.5036   3rd Qu.:-0.7255  
#>  Max.   : 1.5521   Max.   :0.89125   Max.   : 3.4628   Max.   :-0.1717


mytab[['GWR_3']]$config_model
#> [1] "GWR"                "gauss"              "0.0643696843454385"
#> [4] "unknow"             "0"
mytab[['GWR_3']]$CV
#> [1] 39.85757
summary(mytab[['GWR_3']]$model$Betav)
#>    Intercept             X1               X2                X3         
#>  Min.   :-2.7185   Min.   :0.1238   Min.   :-0.6051   Min.   :-1.7655  
#>  1st Qu.:-1.4451   1st Qu.:0.3739   1st Qu.: 1.3691   1st Qu.:-1.2984  
#>  Median :-1.0062   Median :0.5281   Median : 1.8606   Median :-1.0098  
#>  Mean   :-0.8832   Mean   :0.4913   Mean   : 1.8101   Mean   :-1.0196  
#>  3rd Qu.:-0.4704   3rd Qu.:0.6096   3rd Qu.: 2.5611   3rd Qu.:-0.7361  
#>  Max.   : 1.3078   Max.   :0.7948   Max.   : 3.3787   Max.   :-0.2758

mytab[['MGWR']]$config_model
#> [1] "MGWR"     "bisq_knn" "87"       "unknow"   "0"
mytab[['MGWR']]$CV
#> [1] 202.2867
mytab[['MGWR']]$model$Betac
#>            [,1]
#> [1,] -0.8876265
#> [2,]  0.5144641
#> attr(,"names")
#> [1] "Intercept" "X1"
summary(mytab[['MGWR']]$model$Betav)
#>        X2               X3         
#>  Min.   :-0.573   Min.   :-1.8724  
#>  1st Qu.: 1.324   1st Qu.:-1.3358  
#>  Median : 1.792   Median :-1.0545  
#>  Mean   : 1.720   Mean   :-1.0386  
#>  3rd Qu.: 2.376   3rd Qu.:-0.7012  
#>  Max.   : 3.095   Max.   :-0.2616

mytab[['MGWR_2']]$config_model
#> [1] "MGWR"        "gauss_adapt" "14"          "unknow"      "0"
mytab[['MGWR_2']]$CV
#> [1] 204.6507
mytab[['MGWR_2']]$model$Betac
#>            [,1]
#> [1,] -0.8721188
#> [2,]  0.5133712
#> attr(,"names")
#> [1] "Intercept" "X1"
summary(mytab[['MGWR_2']]$model$Betav)
#>        X2                X3         
#>  Min.   :-0.4034   Min.   :-1.9930  
#>  1st Qu.: 1.3349   1st Qu.:-1.3451  
#>  Median : 1.7730   Median :-1.0574  
#>  Mean   : 1.7218   Mean   :-1.0445  
#>  3rd Qu.: 2.3732   3rd Qu.:-0.7093  
#>  Max.   : 3.1010   Max.   :-0.2071

mytab[['MGWR_3']]$config_model
#> [1] "MGWR"               "gauss"              "0.0579660493242724"
#> [4] "unknow"             "0"
mytab[['MGWR_3']]$CV
#> [1] 212.2765
mytab[['MGWR_3']]$model$Betac
#>            [,1]
#> [1,] -0.8692212
#> [2,]  0.5120081
#> attr(,"names")
#> [1] "Intercept" "X1"
summary(mytab[['MGWR_3']]$model$Betav)
#>        X2                X3         
#>  Min.   :-0.1776   Min.   :-1.7972  
#>  1st Qu.: 1.4283   1st Qu.:-1.3506  
#>  Median : 1.7957   Median :-1.0787  
#>  Mean   : 1.7658   Mean   :-1.0685  
#>  3rd Qu.: 2.3650   3rd Qu.:-0.7636  
#>  Max.   : 3.0353   Max.   :-0.3402

Example of automatic search of optimal bandwidth for MGWRSAR_0_kc_kv (stationnary intercept) model using an adaptive gaussian kernel + Automatic search of optimal spatial weigth matrix for SAR part of the model using either a bisquare or adpative bisquare kernel.

mytab2<-bandwidths_mgwrsar(formula = 'Y_mgwrsar_0_kc_kv~X1+X2+X3', data = mydata,coord=coord, fixed_vars='Intercept',Models=c('MGWRSAR_0_kc_kv'),Kernels=c('gauss_adapt'),control=list(),control_search=list(search_W=TRUE,kernels_w=c('bisq','bisq_knn')))
#> #####  MGWRSAR_0_kc_kv  #####
#> #####  gauss_adapt  #####
#> 
#> ########
#> Search W stage
#> ########
#> ########
#> Kernel candidate for W : bisq
#> ########Bandwidth h: Searching support for kernel gauss_adapt  with Model MGWR 
#> Bandwidth h: Searching optimal bandwidth for kernel gauss_adapt  with Model MGWR   ... 
#> Bandwidth h_w for W: Searching optimal bandwidth for kernel bisq  with Model MGWRSAR_0_kc_kv   ... 
#> 0.03615245  . 0.04699818  . 0.04699818  . 0.051698  . 0.0568678  . 0.06255458  . 0.06255458  . 0.06318013  . 0.06381193  . 0.06445005  . 0.06509455  . 0.06574549  . 0.06640295  . 0.06640295  . .
#> ########
#> Kernel candidate for W : bisq_knn
#> ########Bandwidth h: Searching support for kernel gauss_adapt  with Model MGWR 
#> Bandwidth h: Searching optimal bandwidth for kernel gauss_adapt  with Model MGWR   ... 
#> Bandwidth h_w for W: Searching optimal bandwidth for kernel bisq_knn  with Model MGWRSAR_0_kc_kv   ... 
#> 2  . 3  . 4  . 6  . 8  . 8  . 8  . 
#> ########
#> Search bandwidth stage
#> ########
#> Bandwidth h: Searching support for kernel gauss_adapt  with Model MGWRSAR_0_kc_kv 
#> Bandwidth h: Searching optimal bandwidth for kernel gauss_adapt  with Model MGWRSAR_0_kc_kv   ... 
#>  opt$objective =  75.45094  opt$minimum =  9



mytab2[['MGWRSAR_0_kc_kv']]$config_model
#> [1] "MGWRSAR_0_kc_kv" "gauss_adapt"     "9"               "bisq_knn"       
#> [5] "9"
mytab2[['MGWRSAR_0_kc_kv']]$CV
#> [1] 75.45094
mytab2[['MGWRSAR_0_kc_kv']]$model$Betac
#>             [,1]
#> [1,] -0.01336184
#> [2,]  0.36366122
#> attr(,"names")
#> [1] "Intercept" "lambda"
summary(mytab2[['MGWRSAR_0_kc_kv']]$model$Betav)
#>        X1                X2                X3         
#>  Min.   :0.09648   Min.   :-0.6681   Min.   :-1.6610  
#>  1st Qu.:0.35373   1st Qu.: 0.8249   1st Qu.:-1.1632  
#>  Median :0.50542   Median : 1.5117   Median :-1.0515  
#>  Mean   :0.49206   Mean   : 1.4885   Mean   :-1.0412  
#>  3rd Qu.:0.61811   3rd Qu.: 2.3038   3rd Qu.:-0.9344  
#>  Max.   :0.93521   Max.   : 3.4529   Max.   :-0.4849

experimental General kernel Product functions

The package provides additional experimental functions that allow to estimate locally linear model with other dimensions than space. Using the control variable ‘type’, it is possible to add time in the kernel and a limited set of other variables (continuous or categorical). If several dimensions are involved in the kernel, a general product kernel is used and you need to provide a list of bandwidths and a list of kernel types. For time kernel, it uses asymetric kernel, eventually with a decay. For categorical variable, it uses the propositions of Li and racine (2007); see also np package. Optimization of bandwidths has to be done by yourself.

Note that when time or other additional variables are used for kernels, then two small bandwidths could lead to empty local subsamples. We recommend to use gauss and gauss_adapt kernels that suffering less this issue.

## space + time kernel
time=sort(rep(1:500,2))
time[1:150]
W=KNN(coord,8)
### Because only past neighbors are considered, bad choice of bandiwth leads to null weights for first obs ~ it could lead to NAs parameters.

model_MGWRSART_0_kc_kv<-MGWRSAR(formula = 'Y_mgwrsar_0_kc_kv~X1+X2+X3', data = mydata,coord=coord, fixed_vars=c('Intercept'),kernels=c('gauss_adapt','gauss_adapt'),H=c(50,50), Model = 'MGWRSAR_0_kc_kv',control=list(Z=time,W=W,Type='GDT'))
summary_mgwrsar(model_MGWRSART_0_kc_kv)


### space  + continuous variable kernel
model_MGWRSARX_0_kc_kv<-MGWRSAR(formula = 'Y_mgwrsar_0_kc_kv~X1+X2+X3', data = mydata,coord=coord, fixed_vars=c('Intercept'),kernels=c('gauss_adapt','gauss_adapt'),H=c(120,120), Model = 'MGWRSAR_0_kc_kv',control=list(Z=mydata$X2,W=W,Type='GDX'))
summary_mgwrsar(model_MGWRSARX_0_kc_kv)


### space + categorical kernel (Li and Racine 2010)
Z=as.numeric(mydata$X2>mean(mydata$X2))+1
head(Z)

model_MGWRSARC_0_kc_kv<-MGWRSAR(formula = 'Y_mgwrsar_0_kc_kv~X1+X2+X3', data = mydata,coord=coord, fixed_vars=c('Intercept'),kernels=c('gauss_adapt','li','li'),H=c(120,0.2,0.8), Model = 'MGWRSAR_0_kc_kv',control=list(Z=Z,W=W,Type='GDC'))
summary_mgwrsar(model_MGWRSARX_0_kc_kv)

Examples of basic uses of mgwrsar package

Ghislain Geniaux

2018-05-10

Introduction