mgwrsar package estimates linear and local linear models with spatial autocorrelation of the following forms:
\[y=\beta_v(u_i,v_i) X_v+\,\epsilon_i\;\;\;\;\;\;\;\; (GWR)\]
\[y=\beta_c X_c+\beta_v(u_i,v_i) X_v+\,\epsilon_i\;\;\;\;\;\;\;\; (MGWR)\]
\[y=\lambda Wy+\beta_c X_c+\,\epsilon_i\;\;\;\;\;\;\;\; (MGWR-SAR(0,k,0))\]
\[y=\lambda Wy+\beta_v(u_i,v_i)X_v+\,\epsilon_i\;\;\;\;\;\;\;\; (MGWR-SAR(0,0,k))\]
\[y=\lambda Wy+\beta_c X_c+\beta_v(u_i,v_i)X_v+\,\epsilon_i\;\;\;\;\;\;\;\; (MGWR-SAR(0,k_c,k_v))\]
\[y=\lambda(u_i,v_i) Wy+\beta_c X_c+\,\epsilon_i\;\;\;\;\;\;\;\; (MGWR-SAR(1,k,0))\]
\[y=\lambda(u_i,v_i)Wy+\beta_v(u_i,v_i)X_v+\,\epsilon_i\;\;\;\;\;\;\;\; (MGWR-SAR(1,0,k))\]
\[y=\lambda(u_i,v_i)Wy+\beta_cX_c+\beta_v(u_i,v_i)X_v+\,\epsilon_i\;\;\;\;\;\;\;\; (MGWR-SAR(1,k_c,k_v))\]
For more details on the estimation method, see Geniaux, G. and Martinetti, D. (2017). A new method for dealing simultaneously with spatial autocorrelation and spatial heterogeneity in regression models. Regional Science and Urban Economics. (https://doi.org/10.1016/j.regsciurbeco.2017.04.001)
The estimation of the previous model can be done using the MGWRSAR wrapper function which requires a dataframe and a matrix of coordinates. Note that:
In addition to the ability of considering spatial autocorrelation in GWR/MGWR like models, MGWRSAR function introduces several useful technics for estimating local regression with space coordinates:
The MGWRSAR function requires a dataframe and a matrix of coordinates, and eventually a spatial weight matrix if model include spatiale dependence. The package includes a simulated data example which is used for this vignette.
library(mgwrsar)
#> Loading required package: Rcpp
#> Loading required package: sp
#> Loading required package: leaflet
#> Loading required package: Matrix
## loading data example
data(data_mgwrsar)
coord=as.matrix(mydata[,c("x_lat","y_lon")])
## Creating a spatial weigth matrix (sparce dgCMatrix) of 8 nearest neighbors
W=KNN(coord,8)
Estimation of a GWR with a gauss kernel with and without parallel computation:
### with parallel computing
#ptm1<-proc.time()
#model_GWR<-MGWRSAR(formula = 'Y_gwr~X1+X2+X3', data = mydata,coord=coord, #fixed_vars=NULL,kernels=c('gauss'),H=0.13, Model = 'GWR',control=list(SE=TRUE,doMC=TRUE,ncore=4))
#(proc.time()-ptm1)[3]
### without parallel computing
ptm1<-proc.time()
model_GWR<-MGWRSAR(formula = 'Y_gwr~X1+X2+X3', data = mydata,coord=coord, fixed_vars=NULL,kernels=c('gauss'),H=0.13, Model = 'GWR',control=list(SE=TRUE,doMC=FALSE))
(proc.time()-ptm1)[3]
#> elapsed
#> 1.004
Summary and plot of mgwrsar object using leaflet:
summary_mgwrsar(model_GWR)
#> Call:
#> MGWRSAR(formula = "Y_gwr~X1+X2+X3", data = mydata, coord = coord,
#> fixed_vars = NULL, kernels = c("gauss"), H = 0.13, Model = "GWR",
#> control = list(SE = TRUE, doMC = FALSE))
#> Model: GWR
#> Kernels function: gauss
#> Kernels type: GD
#> bandwidth: 0.13
#> Number of data points: 1000
#> Varying parameters: Intercept X1 X2 X3
#> Intercept X1 X2 X3
#> Min. -2.63740 0.16880 -0.37306 -1.7588
#> 1st Qu. -1.46100 0.38590 1.52729 -1.3070
#> Median -1.08812 0.51409 1.94806 -1.0194
#> Mean -0.97597 0.48879 1.94778 -1.0289
#> 3rd Qu. -0.62307 0.59454 2.58871 -0.7439
#> Max. 1.06100 0.79570 3.37755 -0.3083
#> Effective degrees of freedom: 955.0674
#> AIC 135.0056
#> Residual sum of squares: 64.0684
plot_mgwrsar(model_GWR,type='B_coef',var='X2')
plot_mgwrsar(model_GWR,type='t_coef',var='X2')
Estimation of a GWR with spgwr package:
library(spgwr)
#> Loading required package: spData
#> To access larger datasets in this package, install the spDataLarge
#> package with: `install.packages('spDataLarge')`
#> NOTE: This package does not constitute approval of GWR
#> as a method of spatial analysis; see example(gwr)
mydataSP=mydata
coordinates(mydataSP)=c('x_lat','y_lon')
ptm1<-proc.time()
model_spgwr <- gwr(Y_gwr~X1+X2+X3, data=mydataSP, bandwidth=0.13,hatmatrix=TRUE)
(proc.time()-ptm1)[3]
#> elapsed
#> 15.977
head(model_spgwr$SDF@data$gwr.e)
#> [1] -0.1585824 -0.1861144 -0.4450174 -0.1767796 -0.2014330 0.2670349
model_spgwr
#> Call:
#> gwr(formula = Y_gwr ~ X1 + X2 + X3, data = mydataSP, bandwidth = 0.13,
#> hatmatrix = TRUE)
#> Kernel function: gwr.Gauss
#> Fixed bandwidth: 0.13
#> Summary of GWR coefficient estimates at data points:
#> Min. 1st Qu. Median 3rd Qu. Max. Global
#> X.Intercept. -2.63740 -1.46100 -1.08812 -0.62307 1.06100 -1.4845
#> X1 0.16880 0.38590 0.51409 0.59454 0.79570 0.5000
#> X2 -0.37306 1.52729 1.94806 2.58871 3.37755 2.5481
#> X3 -1.75876 -1.30704 -1.01941 -0.74386 -0.30828 -1.0762
#> Number of data points: 1000
#> Effective number of parameters (residual: 2traceS - traceS'S): 62.85371
#> Effective degrees of freedom (residual: 2traceS - traceS'S): 937.1463
#> Sigma (residual: 2traceS - traceS'S): 0.2614678
#> Effective number of parameters (model: traceS): 44.93259
#> Effective degrees of freedom (model: traceS): 955.0674
#> Sigma (model: traceS): 0.2590031
#> Sigma (ML): 0.2531174
#> AICc (GWR p. 61, eq 2.33; p. 96, eq. 4.21): 186.462
#> AIC (GWR p. 96, eq. 4.22): 135.0056
#> Residual sum of squares: 64.0684
#> Quasi-global R2: 0.9813492
all(abs(model_GWR$residuals-model_spgwr$SDF@data$gwr.e)<0.00000000001)
#> [1] TRUE
Estimation of a GWR with leave-one out CV, using an adaptive bisquare kernel (based on 20 nearest neighbors) and a local Outlier Detection/Removal procedure:
model_GWR<-MGWRSAR(formula = 'Y_gwr~X1+X2+X3', data = mydata,coord=coord, fixed_vars=NULL,kernels=c('bisq_knn'),H=20, Model = 'GWR',control=list(isgcv=TRUE,remove_local_outlier=TRUE,outv=0.01))
summary_mgwrsar(model_GWR)
#> Call:
#> MGWRSAR(formula = "Y_gwr~X1+X2+X3", data = mydata, coord = coord,
#> fixed_vars = NULL, kernels = c("bisq_knn"), H = 20, Model = "GWR",
#> control = list(isgcv = TRUE, remove_local_outlier = TRUE,
#> outv = 0.01))
#> Model: GWR
#> Kernels function: bisq_knn
#> Kernels type: GD
#> bandwidth: 20
#> Number of data points: 1000
#> Varying parameters: Intercept X1 X2 X3
#> Intercept X1 X2 X3
#> Min. -4.223718 0.001589 -1.182832 -2.0699
#> 1st Qu. -1.448438 0.347360 1.026678 -1.2974
#> Median -0.773579 0.537315 1.660816 -1.0162
#> Mean -0.797981 0.497172 1.665602 -1.0071
#> 3rd Qu. -0.173053 0.640754 2.456682 -0.7263
#> Max. 2.267588 0.911069 4.103331 -0.0933
#> Residual sum of squares: 16.54259
Estimation of a MGWR with stationary intercept using an adapative gaussian kernel (based on 20 nearest neighbors) + leave-one out CV estimation
model_MGWR<-MGWRSAR(formula = 'Y_mgwr~X1+X2+X3', data = mydata,coord=coord, fixed_vars=c('Intercept'),kernels=c('gauss_adapt'),H=20, Model = 'MGWR',control=list(SE=TRUE))
summary_mgwrsar(model_MGWR)
#> Call:
#> MGWRSAR(formula = "Y_mgwr~X1+X2+X3", data = mydata, coord = coord,
#> fixed_vars = c("Intercept"), kernels = c("gauss_adapt"),
#> H = 20, Model = "MGWR", control = list(SE = TRUE))
#> Model: MGWR
#> Kernels function: gauss_adapt
#> Kernels type: GD
#> bandwidth: 20
#> Number of data points: 1000
#> Constant parameters: Intercept
#> -0.5563375
#> Varying parameters: X1 X2 X3
#> X1 X2 X3
#> Min. 0.17788 0.53761 -1.4822
#> 1st Qu. 0.37102 1.25611 -1.1637
#> Median 0.50101 1.62152 -1.0894
#> Mean 0.48714 1.62199 -1.0462
#> 3rd Qu. 0.58890 2.05394 -0.9554
#> Max. 0.82570 2.71433 -0.5656
#> Effective degrees of freedom: 924.9957
#> AIC -272.9127
#> Residual sum of squares: 41.38677
### leave-one out CV estimation
model_MGWR<-MGWRSAR(formula = 'Y_mgwr~X1+X2+X3', data = mydata,coord=coord, fixed_vars='Intercept',kernels=c('gauss_adapt'),H=20, Model = 'MGWR',control=list(isgcv=TRUE,remove_local_outlier=FALSE,outv=NULL))
summary_mgwrsar(model_MGWR)
#> Call:
#> MGWRSAR(formula = "Y_mgwr~X1+X2+X3", data = mydata, coord = coord,
#> fixed_vars = "Intercept", kernels = c("gauss_adapt"), H = 20,
#> Model = "MGWR", control = list(isgcv = TRUE, remove_local_outlier = FALSE,
#> outv = NULL))
#> Model: MGWR
#> Kernels function: gauss_adapt
#> Kernels type: GD
#> bandwidth: 20
#> Number of data points: 1000
#> Constant parameters: Intercept
#> -0.5836998
#> Varying parameters: X1 X2 X3
#> X1 X2 X3
#> Min. 0.17826 0.53262 -1.4776
#> 1st Qu. 0.37341 1.27013 -1.1609
#> Median 0.50187 1.64459 -1.0867
#> Mean 0.48805 1.63952 -1.0424
#> 3rd Qu. 0.58608 2.07933 -0.9529
#> Max. 0.82377 2.72156 -0.5559
#> Residual sum of squares: 50.18494
Estimation of a MGWR with stationary intercept, using an adapative gaussian kernel (based on 20 nearest neighbors):
model_MGWRSAR_1_0_kv<-MGWRSAR(formula = 'Y_mgwrsar_1_0_kv~X1+X2+X3', data = mydata,coord=coord, fixed_vars=NULL,kernels=c('gauss_adapt'),H=20, Model = 'MGWRSAR_1_0_kv',control=list(W=W,Lambdacor=TRUE,SE=TRUE))
summary_mgwrsar(model_MGWRSAR_1_0_kv)
#> Call:
#> MGWRSAR(formula = "Y_mgwrsar_1_0_kv~X1+X2+X3", data = mydata,
#> coord = coord, fixed_vars = NULL, kernels = c("gauss_adapt"),
#> H = 20, Model = "MGWRSAR_1_0_kv", control = list(W = W, Lambdacor = TRUE,
#> SE = TRUE))
#> Model: MGWRSAR_1_0_kv
#> Method for spatial autocorrelation: MGWRSAR_1_0_kv
#> Kernels function: gauss_adapt
#> Kernels type: GD
#> bandwidth: 20
#> Number of data points: 1000
#> Varying parameters: Intercept X1 X2 X3
#> Intercept X1 X2 X3 lambda
#> Min. -8.94883 0.10555 -2.65846 -2.10631 -0.2844
#> 1st Qu. -1.74523 0.37095 0.81483 -1.29394 0.1355
#> Median -0.99141 0.54257 2.11968 -1.03264 0.2965
#> Mean -1.06465 0.50832 2.18127 -1.07079 0.3152
#> 3rd Qu. -0.21901 0.63831 3.55568 -0.73756 0.4899
#> Max. 2.20903 0.94188 9.84625 -0.34718 0.9153
#> Effective degrees of freedom: 893.3478
#> AIC 2158.486
#> Residual sum of squares: 456.0999
Estimation of a mgwrsar(0,kc,kv) model with stationary intercept and stationnary spatial multiplier using an adapative gaussian kernel (based on 20 nearest neighbors):
model_MGWRSAR_0_kc_kv<-MGWRSAR(formula = 'Y_mgwrsar_0_kc_kv~X1+X2+X3', data = mydata,coord=coord, fixed_vars=c('Intercept'),kernels=c('gauss_adapt'),H=20, Model = 'MGWRSAR_0_kc_kv',control=list(W=W))
summary_mgwrsar(model_MGWRSAR_0_kc_kv)
#> Call:
#> MGWRSAR(formula = "Y_mgwrsar_0_kc_kv~X1+X2+X3", data = mydata,
#> coord = coord, fixed_vars = c("Intercept"), kernels = c("gauss_adapt"),
#> H = 20, Model = "MGWRSAR_0_kc_kv", control = list(W = W))
#> Model: MGWRSAR_0_kc_kv
#> Method for spatial autocorrelation: MGWRSAR_0_kc_kv
#> Kernels function: gauss_adapt
#> Kernels type: GD
#> bandwidth: 20
#> Number of data points: 1000
#> Constant parameters: Intercept
#> -0.3140325 0.2254189
#> Varying parameters: X1 X2 X3
#> X1 X2 X3
#> Min. 0.18113 -0.19560 -1.6156
#> 1st Qu. 0.38038 1.30055 -1.2737
#> Median 0.50396 1.92790 -1.1129
#> Mean 0.49373 1.93858 -1.0928
#> 3rd Qu. 0.59936 2.80034 -0.9463
#> Max. 0.86079 3.67151 -0.4367
#> Residual sum of squares: 122.6273
Estimation of a mgwrsar(0,0,kv) model with stationnary spatial multiplier using an adapative gaussian kernel (based on 20 nearest neighbors):
model_MGWRSAR_0_0_kv<-MGWRSAR(formula = 'Y_mgwrsar_0_0_kv~X1+X2+X3', data = mydata,coord=coord, fixed_vars=NULL,kernels=c('gauss_adapt'),H=20, Model = 'MGWRSAR_0_0_kv',control=list(W=W))
summary_mgwrsar(model_MGWRSAR_0_0_kv)
#> Call:
#> MGWRSAR(formula = "Y_mgwrsar_0_0_kv~X1+X2+X3", data = mydata,
#> coord = coord, fixed_vars = NULL, kernels = c("gauss_adapt"),
#> H = 20, Model = "MGWRSAR_0_0_kv", control = list(W = W))
#> Model: MGWRSAR_0_0_kv
#> Method for spatial autocorrelation: MGWRSAR_0_0_kv
#> Kernels function: gauss_adapt
#> Kernels type: GD
#> bandwidth: 20
#> Number of data points: 1000
#> Constant parameters:
#> 0.250141
#> Varying parameters: Intercept X1 X2 X3
#> Intercept X1 X2 X3
#> Min. -4.928243 0.104342 -1.979095 -2.0943
#> 1st Qu. -1.429473 0.365563 1.262898 -1.2896
#> Median -0.779578 0.536273 2.011070 -0.9916
#> Mean -0.689637 0.502558 2.023040 -1.0512
#> 3rd Qu. -0.094623 0.630135 3.365939 -0.7410
#> Max. 2.327138 0.891468 5.401186 -0.3125
#> Residual sum of squares: 93.50372
Estimation of a mgwrsar(1,kv,kv) model with stationary intercept using an adapative gaussian kernel (based on 20 nearest neighbors):
model_MGWRSAR_1_kc_kv<-MGWRSAR(formula = 'Y_mgwrsar_1_kc_kv~X1+X2+X3', data = mydata,coord=coord, fixed_vars=c('Intercept'),kernels=c('gauss_adapt'),H=20, Model = 'MGWRSAR_1_kc_kv',control=list(W=W))
summary_mgwrsar(model_MGWRSAR_1_kc_kv)
#> Call:
#> MGWRSAR(formula = "Y_mgwrsar_1_kc_kv~X1+X2+X3", data = mydata,
#> coord = coord, fixed_vars = c("Intercept"), kernels = c("gauss_adapt"),
#> H = 20, Model = "MGWRSAR_1_kc_kv", control = list(W = W))
#> Model: MGWRSAR_1_kc_kv
#> Method for spatial autocorrelation: MGWRSAR_1_kc_kv
#> Kernels function: gauss_adapt
#> Kernels type: GD
#> bandwidth: 20
#> Number of data points: 1000
#> Constant parameters: Intercept
#> -0.6698754
#> Varying parameters: X1 X2 X3
#> X1 X2 X3 lambda
#> Min. 0.19982 -0.46764 -1.90291 -0.2478
#> 1st Qu. 0.41206 1.13441 -1.27422 0.1194
#> Median 0.52169 1.84084 -1.09489 0.2813
#> Mean 0.50659 1.96433 -1.09162 0.3263
#> 3rd Qu. 0.60433 2.86682 -0.88669 0.5122
#> Max. 0.83615 4.86693 -0.44568 1.2080
#> Residual sum of squares: 869.4819
mgwrsar_bootstrap_test function allows to perform a bootstrap test for Betas for mgwrsar class model (with parallel computation). Could be long, not run here.
model_GWR<-MGWRSAR(formula = 'Y_mgwr~X1+X2+X3', data = mydata,coord=coord, fixed_vars=NULL,kernels=c('gauss_adapt'),H=20, Model = 'GWR',control=list(SE=TRUE))
summary_mgwrsar(model_GWR)
model_MGWR<-MGWRSAR(formula = 'Y_mgwr~X1+X2+X3', data = mydata,coord=coord, fixed_vars=c('Intercept'),kernels=c('gauss_adapt'),H=20, Model = 'MGWR',control=list(SE=TRUE))
summary_mgwrsar(model_MGWR)
test<-mgwrsar_bootstrap_test(model_MGWR,model_GWR,B=30,domc=FALSE,ncore=1,type='standard',eps='H1',df='H1',focal='median',D=NULL)
# result
# > test
# > p_star 0
# > T 69.92265
In this example, we use rows 1 to 800 as insample data for model estimation and rows 801 to 1000 as outsample for prediction.
Prediction of GWR model using sheppard kernel with 8 neighbors:
model_GWR_insample<-MGWRSAR(formula = 'Y_gwr~X1+X2+X3', data = mydata[1:800,],coord=coord[1:800,], fixed_vars=NULL,kernels=c('gauss_adapt'),H=8, Model = 'GWR',control=list())
Y_pred=predict_mgwrsar(model_GWR_insample, newdata=mydata[801:1000,], newdata_coord=coord[801:1000,], k_extra = 8, kernel_extra = "sheppard")
head(Y_pred)
#> [1] 0.5111469 2.8323339 2.8674031 0.2058560 1.8421794 0.6162536
head(mydata$Y_gwr[801:1000])
#> [1] 0.2490662 2.8329445 2.7107657 0.1949003 1.7104271 0.6286141
sqrt(mean((mydata$Y_gwr[801:1000]-Y_pred)^2)) # RMSE
#> [1] 0.1343204
Prediction of MGWRSAR_1_0_kv model using sheppard kernel with 8 neighbors and Best Linear Unbiased Predictor (BLUP) :
model_MGWRSAR_1_0_kv_insample<-MGWRSAR(formula = 'Y_mgwrsar_1_0_kv~X1+X2+X3', data = mydata[1:800,],coord=coord[1:800,], fixed_vars=NULL,kernels=c('gauss_adapt'),H=20, Model = 'MGWRSAR_1_0_kv',control=list(W=W[1:800,1:800]))
summary_mgwrsar(model_MGWRSAR_1_0_kv_insample)
#> Call:
#> MGWRSAR(formula = "Y_mgwrsar_1_0_kv~X1+X2+X3", data = mydata[1:800,
#> ], coord = coord[1:800, ], fixed_vars = NULL, kernels = c("gauss_adapt"),
#> H = 20, Model = "MGWRSAR_1_0_kv", control = list(W = W[1:800,
#> 1:800]))
#> Model: MGWRSAR_1_0_kv
#> Method for spatial autocorrelation: MGWRSAR_1_0_kv
#> Kernels function: gauss_adapt
#> Kernels type: GD
#> bandwidth: 20
#> Number of data points: 800
#> Varying parameters: Intercept X1 X2 X3
#> Intercept X1 X2 X3 lambda
#> Min. -7.05380 0.10543 -2.35128 -2.19224 -0.5765
#> 1st Qu. -1.92244 0.39116 0.91679 -1.33372 0.0956
#> Median -0.89820 0.54991 2.10489 -1.00608 0.2728
#> Mean -0.99662 0.51309 2.61515 -1.09223 0.2588
#> 3rd Qu. -0.20394 0.64410 4.09003 -0.72127 0.4379
#> Max. 1.98675 0.84522 9.77120 -0.43915 1.2524
#> Residual sum of squares: 352.3345
## Using Best Linear Unbiased Predictor
Y_pred=predict_mgwrsar(model_MGWRSAR_1_0_kv_insample, newdata=mydata[801:1000,], newdata_coord=coord[801:1000,], k_extra = 12, W = W, type = "BPN", kernel_extra = "sheppard")
head(Y_pred)
#> [,1]
#> [1,] 0.8833344
#> [2,] 5.5199667
#> [3,] 4.1304483
#> [4,] 1.1905310
#> [5,] 2.9239718
#> [6,] 1.8684993
head(mydata$Y_mgwrsar_1_0_kv[801:1000])
#> [1] 0.5400965 5.0848714 3.9300001 1.1680551 2.4675708 0.9905510
sqrt(mean((mydata$Y_mgwrsar_1_0_kv[801:1000]-Y_pred)^2))
#> [1] 8.442365
## without Best Linear Unbiased Predictor
Y_pred=predict_mgwrsar(model_MGWRSAR_1_0_kv_insample, newdata=mydata[801:1000,], newdata_coord=coord[801:1000,], k_extra = 12, W = W, type = "TC", kernel_extra = "sheppard")
head(Y_pred)
#> [1] 1.435196 5.666258 4.219273 1.388306 3.459105 1.706876
head(mydata$Y_mgwrsar_1_0_kv[801:1000])
#> [1] 0.5400965 5.0848714 3.9300001 1.1680551 2.4675708 0.9905510
sqrt(mean((mydata$Y_mgwrsar_1_0_kv[801:1000]-Y_pred)^2))
#> [1] 54.01216
In the following example, we show how to find optimal bandwidth by hand.
######################
#### Finding bandwidth by hand
#####################
model_GWR<-MGWRSAR(formula = 'Y_gwr~X1+X2+X3', data = mydata,coord=coord, fixed_vars=NULL,kernels=c('bisq_knn'),H=50, Model = 'GWR',control=list(isgcv=FALSE,minv=1))
summary_mgwrsar(model_GWR)
#> Call:
#> MGWRSAR(formula = "Y_gwr~X1+X2+X3", data = mydata, coord = coord,
#> fixed_vars = NULL, kernels = c("bisq_knn"), H = 50, Model = "GWR",
#> control = list(isgcv = FALSE, minv = 1))
#> Model: GWR
#> Kernels function: bisq_knn
#> Kernels type: GD
#> bandwidth: 50
#> Number of data points: 1000
#> Varying parameters: Intercept X1 X2 X3
#> Intercept X1 X2 X3
#> Min. -2.963548 0.049927 -0.710652 -1.8541
#> 1st Qu. -1.506600 0.349613 1.126145 -1.2796
#> Median -0.847094 0.546082 1.672279 -1.0021
#> Mean -0.821191 0.496747 1.683377 -1.0047
#> 3rd Qu. -0.251085 0.627595 2.508024 -0.7218
#> Max. 1.554918 0.855126 3.595044 -0.1968
#> Residual sum of squares: 7.606539
myCV<-function(H){
model_GWR<-MGWRSAR(formula = 'Y_gwr~X1+X2+X3', data = mydata,coord=coord, fixed_vars=NULL,kernels=c('gauss_adapt'),H, Model = 'GWR',control=list(isgcv=TRUE))
cat('... Try h=',H,' ')
model_GWR$SSR
}
res=optimize(myCV,upper=500,lower=10)
#> ... Try h= 197.1633 ... Try h= 312.8367 ... Try h= 125.6733 ... Try h= 81.49004 ... Try h= 54.18327 ... Try h= 37.30676 ... Try h= 26.87651 ... Try h= 20.43026 ... Try h= 16.44625 ... Try h= 13.984 ... Try h= 12.46225 ... Try h= 11.84349 ... Try h= 11.13934 ... Try h= 11.49141 ... Try h= 11.35693 ... Try h= 11.27382 ... Try h= 11.22245 ... Try h= 11.19071 ... Try h= 11.17109 ... Try h= 11.15896 ... Try h= 11.15147 ... Try h= 11.14683 ... Try h= 11.14397 ... Try h= 11.1422 ... Try h= 11.14111 ... Try h= 11.14043 ... Try h= 11.14001 ... Try h= 11.13976 ... Try h= 11.1396 ... Try h= 11.1395 ... Try h= 11.13944 ... Try h= 11.1394 ... Try h= 11.1394
res
#> $minimum
#> [1] 11.1394
#>
#> $objective
#> [1] 15.32105
## model with optimal bandwith with adaptative gaussian kernel
model_GWR<-MGWRSAR(formula = 'Y_gwr~X1+X2+X3', data = mydata,coord=coord, fixed_vars=NULL,kernels=c('gauss_adapt'),H=ceiling(res$minimum), Model = 'GWR',control=list(isgcv=FALSE))
summary_mgwrsar(model_GWR)
#> Call:
#> MGWRSAR(formula = "Y_gwr~X1+X2+X3", data = mydata, coord = coord,
#> fixed_vars = NULL, kernels = c("gauss_adapt"), H = ceiling(res$minimum),
#> Model = "GWR", control = list(isgcv = FALSE))
#> Model: GWR
#> Kernels function: gauss_adapt
#> Kernels type: GD
#> bandwidth: 12
#> Number of data points: 1000
#> Varying parameters: Intercept X1 X2 X3
#> Intercept X1 X2 X3
#> Min. -3.053886 0.082189 -0.706409 -1.8197
#> 1st Qu. -1.500391 0.352994 1.184793 -1.2743
#> Median -0.885585 0.541126 1.726578 -1.0048
#> Mean -0.835642 0.498025 1.705481 -1.0081
#> 3rd Qu. -0.310028 0.624075 2.512837 -0.7302
#> Max. 1.519882 0.876498 3.423344 -0.1999
#> Residual sum of squares: 11.23341
mgwrsar package provides a wrapper that allows to find optimal bandwidth for a selction of model and kernel types. It is designed only for spatial kernel (type=‘GD’). It stores results for all models and kernels using an optimal bandwidth (leave-one out CV criteria) and provides both results with and without leave-one-out sampling. It also allows to find an optimal spatial weight matrix for a given kernel when the model includes spatial dependence.
Example of automatic search of optimal bandwidth for GWR and MGWR (stationnary intercept) models using either an adaptive gaussian kernel or adapative bisquare kernel or gaussian kernel, with at least 8 neigbhors for adaptive kernel and at least a bandwidth of 0.03 for gaussian kernel:
mytab<-bandwidths_mgwrsar(formula = 'Y_gwr~X1+X2+X3', data = mydata,coord=coord, fixed_vars=c('Intercept','X1'),Models=c('GWR','MGWR'),Kernels=c('bisq_knn','gauss_adapt','gauss'),control=list(),control_search=list(lower_d=8,lower_c=0.03,upper_c=0.65))
#> ##### GWR #####
#> ##### bisq_knn #####
#>
#> ########
#> Search bandwidth stage
#> ########
#> Bandwidth h: Searching support for kernel bisq_knn with Model GWR
#> Bandwidth h: Searching optimal bandwidth for kernel bisq_knn with Model GWR ...
#> 8 . 11 . 15 . 15 . 17 . 19 . 19 . 20 . 21 . opt$objective = 9.673904 opt$minimum = 22
#>
#> ##### gauss_adapt #####
#>
#> ########
#> Search bandwidth stage
#> ########
#> Bandwidth h: Searching support for kernel gauss_adapt with Model GWR
#> Bandwidth h: Searching optimal bandwidth for kernel gauss_adapt with Model GWR ...
#> opt$objective = 12.51025 opt$minimum = 8
#>
#> ##### gauss #####
#>
#> ########
#> Search bandwidth stage
#> ########
#> Bandwidth h: Searching support for kernel gauss with Model GWR
#> Bandwidth h: Searching optimal bandwidth for kernel gauss with Model GWR ...
#> opt$objective = 39.85757 opt$minimum = 0.06436968
#>
#> ##### MGWR #####
#> ##### bisq_knn #####
#>
#> ########
#> Search bandwidth stage
#> ########
#> Bandwidth h: Searching support for kernel bisq_knn with Model MGWR
#> Bandwidth h: Searching optimal bandwidth for kernel bisq_knn with Model MGWR ...
#> 8 . 11 . 15 . 20 . 26 . 34 . 45 . 59 . 77 . 77 . 85 . 85 . 86 . opt$objective = 202.2867 opt$minimum = 87
#>
#> ##### gauss_adapt #####
#>
#> ########
#> Search bandwidth stage
#> ########
#> Bandwidth h: Searching support for kernel gauss_adapt with Model MGWR
#> Bandwidth h: Searching optimal bandwidth for kernel gauss_adapt with Model MGWR ...
#> 8 . 11 . 11 . 13 . 13 . opt$objective = 204.6507 opt$minimum = 14
#>
#> ##### gauss #####
#>
#> ########
#> Search bandwidth stage
#> ########
#> Bandwidth h: Searching support for kernel gauss with Model MGWR
#> Bandwidth h: Searching optimal bandwidth for kernel gauss with Model MGWR ...
#> opt$objective = 212.2765 opt$minimum = 0.05796605
names(mytab)
#> [1] "GWR" "MGWR" "GWR_2" "GWR_3" "MGWR_2" "MGWR_3"
names(mytab[['GWR']])
#> [1] "config_model" "CV" "SSR" "model"
mytab[['GWR']]$config_model
#> [1] "GWR" "bisq_knn" "22" "unknow" "0"
mytab[['GWR']]$CV
#> [1] 9.673904
summary(mytab[['GWR']]$model$Betav)
#> Intercept X1 X2 X3
#> Min. :-3.6889 Min. :0.0165 Min. :-0.8843 Min. :-2.12121
#> 1st Qu.:-1.4614 1st Qu.:0.3526 1st Qu.: 1.0836 1st Qu.:-1.29786
#> Median :-0.8098 Median :0.5425 Median : 1.6525 Median :-1.00958
#> Mean :-0.8239 Mean :0.4992 Mean : 1.6717 Mean :-1.00068
#> 3rd Qu.:-0.2008 3rd Qu.:0.6358 3rd Qu.: 2.4624 3rd Qu.:-0.71484
#> Max. : 2.1706 Max. :0.9191 Max. : 3.7134 Max. :-0.03991
mybestmodel=mytab[['GWR']]$model
plot_mgwrsar(mybestmodel,type='B_coef',var='X2')
mytab[['GWR_2']]$config_model
#> [1] "GWR" "gauss_adapt" "8" "unknow" "0"
mytab[['GWR_2']]$CV
#> [1] 12.51025
summary(mytab[['GWR_2']]$model$Betav)
#> Intercept X1 X2 X3
#> Min. :-3.1688 Min. :0.06144 Min. :-0.6861 Min. :-1.8886
#> 1st Qu.:-1.5131 1st Qu.:0.35222 1st Qu.: 1.1450 1st Qu.:-1.2755
#> Median :-0.8678 Median :0.54507 Median : 1.6688 Median :-1.0026
#> Mean :-0.8315 Mean :0.49891 Mean : 1.6891 Mean :-1.0046
#> 3rd Qu.:-0.2661 3rd Qu.:0.62965 3rd Qu.: 2.5036 3rd Qu.:-0.7255
#> Max. : 1.5521 Max. :0.89125 Max. : 3.4628 Max. :-0.1717
mytab[['GWR_3']]$config_model
#> [1] "GWR" "gauss" "0.0643696843454385"
#> [4] "unknow" "0"
mytab[['GWR_3']]$CV
#> [1] 39.85757
summary(mytab[['GWR_3']]$model$Betav)
#> Intercept X1 X2 X3
#> Min. :-2.7185 Min. :0.1238 Min. :-0.6051 Min. :-1.7655
#> 1st Qu.:-1.4451 1st Qu.:0.3739 1st Qu.: 1.3691 1st Qu.:-1.2984
#> Median :-1.0062 Median :0.5281 Median : 1.8606 Median :-1.0098
#> Mean :-0.8832 Mean :0.4913 Mean : 1.8101 Mean :-1.0196
#> 3rd Qu.:-0.4704 3rd Qu.:0.6096 3rd Qu.: 2.5611 3rd Qu.:-0.7361
#> Max. : 1.3078 Max. :0.7948 Max. : 3.3787 Max. :-0.2758
mytab[['MGWR']]$config_model
#> [1] "MGWR" "bisq_knn" "87" "unknow" "0"
mytab[['MGWR']]$CV
#> [1] 202.2867
mytab[['MGWR']]$model$Betac
#> [,1]
#> [1,] -0.8876265
#> [2,] 0.5144641
#> attr(,"names")
#> [1] "Intercept" "X1"
summary(mytab[['MGWR']]$model$Betav)
#> X2 X3
#> Min. :-0.573 Min. :-1.8724
#> 1st Qu.: 1.324 1st Qu.:-1.3358
#> Median : 1.792 Median :-1.0545
#> Mean : 1.720 Mean :-1.0386
#> 3rd Qu.: 2.376 3rd Qu.:-0.7012
#> Max. : 3.095 Max. :-0.2616
mytab[['MGWR_2']]$config_model
#> [1] "MGWR" "gauss_adapt" "14" "unknow" "0"
mytab[['MGWR_2']]$CV
#> [1] 204.6507
mytab[['MGWR_2']]$model$Betac
#> [,1]
#> [1,] -0.8721188
#> [2,] 0.5133712
#> attr(,"names")
#> [1] "Intercept" "X1"
summary(mytab[['MGWR_2']]$model$Betav)
#> X2 X3
#> Min. :-0.4034 Min. :-1.9930
#> 1st Qu.: 1.3349 1st Qu.:-1.3451
#> Median : 1.7730 Median :-1.0574
#> Mean : 1.7218 Mean :-1.0445
#> 3rd Qu.: 2.3732 3rd Qu.:-0.7093
#> Max. : 3.1010 Max. :-0.2071
mytab[['MGWR_3']]$config_model
#> [1] "MGWR" "gauss" "0.0579660493242724"
#> [4] "unknow" "0"
mytab[['MGWR_3']]$CV
#> [1] 212.2765
mytab[['MGWR_3']]$model$Betac
#> [,1]
#> [1,] -0.8692212
#> [2,] 0.5120081
#> attr(,"names")
#> [1] "Intercept" "X1"
summary(mytab[['MGWR_3']]$model$Betav)
#> X2 X3
#> Min. :-0.1776 Min. :-1.7972
#> 1st Qu.: 1.4283 1st Qu.:-1.3506
#> Median : 1.7957 Median :-1.0787
#> Mean : 1.7658 Mean :-1.0685
#> 3rd Qu.: 2.3650 3rd Qu.:-0.7636
#> Max. : 3.0353 Max. :-0.3402
Example of automatic search of optimal bandwidth for MGWRSAR_0_kc_kv (stationnary intercept) model using an adaptive gaussian kernel + Automatic search of optimal spatial weigth matrix for SAR part of the model using either a bisquare or adpative bisquare kernel.
mytab2<-bandwidths_mgwrsar(formula = 'Y_mgwrsar_0_kc_kv~X1+X2+X3', data = mydata,coord=coord, fixed_vars='Intercept',Models=c('MGWRSAR_0_kc_kv'),Kernels=c('gauss_adapt'),control=list(),control_search=list(search_W=TRUE,kernels_w=c('bisq','bisq_knn')))
#> ##### MGWRSAR_0_kc_kv #####
#> ##### gauss_adapt #####
#>
#> ########
#> Search W stage
#> ########
#> ########
#> Kernel candidate for W : bisq
#> ########Bandwidth h: Searching support for kernel gauss_adapt with Model MGWR
#> Bandwidth h: Searching optimal bandwidth for kernel gauss_adapt with Model MGWR ...
#> Bandwidth h_w for W: Searching optimal bandwidth for kernel bisq with Model MGWRSAR_0_kc_kv ...
#> 0.03615245 . 0.04699818 . 0.04699818 . 0.051698 . 0.0568678 . 0.06255458 . 0.06255458 . 0.06318013 . 0.06381193 . 0.06445005 . 0.06509455 . 0.06574549 . 0.06640295 . 0.06640295 . .
#> ########
#> Kernel candidate for W : bisq_knn
#> ########Bandwidth h: Searching support for kernel gauss_adapt with Model MGWR
#> Bandwidth h: Searching optimal bandwidth for kernel gauss_adapt with Model MGWR ...
#> Bandwidth h_w for W: Searching optimal bandwidth for kernel bisq_knn with Model MGWRSAR_0_kc_kv ...
#> 2 . 3 . 4 . 6 . 8 . 8 . 8 .
#> ########
#> Search bandwidth stage
#> ########
#> Bandwidth h: Searching support for kernel gauss_adapt with Model MGWRSAR_0_kc_kv
#> Bandwidth h: Searching optimal bandwidth for kernel gauss_adapt with Model MGWRSAR_0_kc_kv ...
#> opt$objective = 75.45094 opt$minimum = 9
mytab2[['MGWRSAR_0_kc_kv']]$config_model
#> [1] "MGWRSAR_0_kc_kv" "gauss_adapt" "9" "bisq_knn"
#> [5] "9"
mytab2[['MGWRSAR_0_kc_kv']]$CV
#> [1] 75.45094
mytab2[['MGWRSAR_0_kc_kv']]$model$Betac
#> [,1]
#> [1,] -0.01336184
#> [2,] 0.36366122
#> attr(,"names")
#> [1] "Intercept" "lambda"
summary(mytab2[['MGWRSAR_0_kc_kv']]$model$Betav)
#> X1 X2 X3
#> Min. :0.09648 Min. :-0.6681 Min. :-1.6610
#> 1st Qu.:0.35373 1st Qu.: 0.8249 1st Qu.:-1.1632
#> Median :0.50542 Median : 1.5117 Median :-1.0515
#> Mean :0.49206 Mean : 1.4885 Mean :-1.0412
#> 3rd Qu.:0.61811 3rd Qu.: 2.3038 3rd Qu.:-0.9344
#> Max. :0.93521 Max. : 3.4529 Max. :-0.4849
The package provides additional experimental functions that allow to estimate locally linear model with other dimensions than space. Using the control variable ‘type’, it is possible to add time in the kernel and a limited set of other variables (continuous or categorical). If several dimensions are involved in the kernel, a general product kernel is used and you need to provide a list of bandwidths and a list of kernel types. For time kernel, it uses asymetric kernel, eventually with a decay. For categorical variable, it uses the propositions of Li and racine (2007); see also np package. Optimization of bandwidths has to be done by yourself.
Note that when time or other additional variables are used for kernels, then two small bandwidths could lead to empty local subsamples. We recommend to use gauss and gauss_adapt kernels that suffering less this issue.
## space + time kernel
time=sort(rep(1:500,2))
time[1:150]
W=KNN(coord,8)
### Because only past neighbors are considered, bad choice of bandiwth leads to null weights for first obs ~ it could lead to NAs parameters.
model_MGWRSART_0_kc_kv<-MGWRSAR(formula = 'Y_mgwrsar_0_kc_kv~X1+X2+X3', data = mydata,coord=coord, fixed_vars=c('Intercept'),kernels=c('gauss_adapt','gauss_adapt'),H=c(50,50), Model = 'MGWRSAR_0_kc_kv',control=list(Z=time,W=W,Type='GDT'))
summary_mgwrsar(model_MGWRSART_0_kc_kv)
### space + continuous variable kernel
model_MGWRSARX_0_kc_kv<-MGWRSAR(formula = 'Y_mgwrsar_0_kc_kv~X1+X2+X3', data = mydata,coord=coord, fixed_vars=c('Intercept'),kernels=c('gauss_adapt','gauss_adapt'),H=c(120,120), Model = 'MGWRSAR_0_kc_kv',control=list(Z=mydata$X2,W=W,Type='GDX'))
summary_mgwrsar(model_MGWRSARX_0_kc_kv)
### space + categorical kernel (Li and Racine 2010)
Z=as.numeric(mydata$X2>mean(mydata$X2))+1
head(Z)
model_MGWRSARC_0_kc_kv<-MGWRSAR(formula = 'Y_mgwrsar_0_kc_kv~X1+X2+X3', data = mydata,coord=coord, fixed_vars=c('Intercept'),kernels=c('gauss_adapt','li','li'),H=c(120,0.2,0.8), Model = 'MGWRSAR_0_kc_kv',control=list(Z=Z,W=W,Type='GDC'))
summary_mgwrsar(model_MGWRSARX_0_kc_kv)