PhenoCam API and Data Fusion

Exploriong the PhenoCam metadata

We can obtain an up-to-date data.frame of the metadata of the entire PhenoCam network using the get_phenos() function. The returning value would be a data.table in order to simplify further data exploration.

library(data.table)
library(phenocamapi)

# obtaining the metadata as data.table
phenos <- get_phenos()

Filtering the dataset using attributes

For example we can just list the sites that have flux tower:

# store sites with flux_data available
phenofluxsites <- phenos[flux_data==TRUE&!is.na(flux_sitenames), .(PhenoCam=site, Flux=flux_sitenames)]

# see the first few rows
head(phenofluxsites)
#>                PhenoCam         Flux
#> 1:       alligatorriver       US-NC4
#> 2:        arscolesnorth         LTAR
#> 3:        arscolessouth         LTAR
#> 4: arsgreatbasinltar098       US-Rws
#> 5: arsgreatbasinltar177       US-Rms
#> 6:           arsmorris1 Unaffiliated

#list deciduous broadleaf sites with flux tower
DB.flux <- phenos[flux_data==TRUE&primary_veg_type=='DB', site]

# see the first few rows
head(DB.flux)
#> [1] "alligatorriver" "bartlett"       "bartlettir"     "bbc3"          
#> [5] "bbc4"           "bbc7"

Downloading PhenoCam time-series data

PhenoCam time-series are extracted time-series data obtained from ROI’s for a given site.

To download the phenological time-series from the PhenoCam, we need to know the sitename, vegetation type and ROI ID. This information can be obtained from the PhenoCam website or using the get_rois() function:

# obtaining the list of all the available ROI's on the PhenoCam server
rois <- get_rois()

head(rois$roi_name)
#> [1] "alligatorriver_DB_1000"   "arbutuslake_DB_1000"     
#> [3] "arbutuslakeinlet_DB_1000" "arbutuslakeinlet_EN_1000"
#> [5] "arbutuslakeinlet_EN_2000" "archboldavir_AG_1000"

colnames(rois)
#>  [1] "roi_name"          "site"              "lat"              
#>  [4] "lon"               "roitype"           "active"           
#>  [7] "show_link"         "show_data_link"    "sequence_number"  
#> [10] "description"       "first_date"        "last_date"        
#> [13] "site_years"        "missing_data_pct"  "roi_page"         
#> [16] "roi_stats_file"    "one_day_summary"   "three_day_summary"
#> [19] "data_release"

# list all the ROI's for dukehw
rois[site=='dukehw',]
#>          roi_name   site      lat       lon roitype active show_link
#> 1: dukehw_DB_1000 dukehw 35.97358 -79.10037      DB   TRUE      TRUE
#>    show_data_link sequence_number
#> 1:           TRUE            1000
#>                                      description first_date  last_date
#> 1: canopy level DB forest at awesome Duke forest 2013-06-01 2019-05-20
#>    site_years missing_data_pct
#> 1:        5.7              4.0
#>                                                                   roi_page
#> 1: https://phenocam.sr.unh.edu/data/archive/dukehw/ROI/dukehw_DB_1000.html
#>                                                                     roi_stats_file
#> 1: https://phenocam.sr.unh.edu/data/archive/dukehw/ROI/dukehw_DB_1000_roistats.csv
#>                                                                one_day_summary
#> 1: https://phenocam.sr.unh.edu/data/archive/dukehw/ROI/dukehw_DB_1000_1day.csv
#>                                                              three_day_summary
#> 1: https://phenocam.sr.unh.edu/data/archive/dukehw/ROI/dukehw_DB_1000_3day.csv
#>    data_release
#> 1:          pre

The get_pheno_ts() function can download a time-series and return the result as a data.table. For example, to obtain the time-series for DB_1000 from the dukehw PhenoCam site, we can run the following code:

# to obtain the DB 1000  from dukehw
dukehw_DB_1000 <- get_pheno_ts(site = 'dukehw', vegType = 'DB', roiID = 1000, type = '3day')

colnames(dukehw_DB_1000)
#>  [1] "date"                 "year"                 "doy"                 
#>  [4] "image_count"          "midday_filename"      "midday_r"            
#>  [7] "midday_g"             "midday_b"             "midday_gcc"          
#> [10] "midday_rcc"           "r_mean"               "r_std"               
#> [13] "g_mean"               "g_std"                "b_mean"              
#> [16] "b_std"                "gcc_mean"             "gcc_std"             
#> [19] "gcc_50"               "gcc_75"               "gcc_90"              
#> [22] "rcc_mean"             "rcc_std"              "rcc_50"              
#> [25] "rcc_75"               "rcc_90"               "max_solar_elev"      
#> [28] "snow_flag"            "outlierflag_gcc_mean" "outlierflag_gcc_50"  
#> [31] "outlierflag_gcc_75"   "outlierflag_gcc_90"   "YEAR"                
#> [34] "DOY"                  "YYYYMMDD"

dukehw_DB_1000[,date:=as.Date(date)]
dukehw_DB_1000[,plot(date, gcc_90, col = 'green', type = 'b')]
#> NULL
mtext('Duke Forest, Hardwood', font = 2)

Merge with other time-series such as flux data

In a fully programmatic settings you can load the PhenoCam dataset, find the related flux data, load the flux data and merge everything together as follows:

phenots <- get_pheno_ts(site = 'oregonMP', vegType = 'EN', roiID = 1000)

colnames(phenots)
#>  [1] "date"                 "year"                 "doy"                 
#>  [4] "image_count"          "midday_filename"      "midday_r"            
#>  [7] "midday_g"             "midday_b"             "midday_gcc"          
#> [10] "midday_rcc"           "r_mean"               "r_std"               
#> [13] "g_mean"               "g_std"                "b_mean"              
#> [16] "b_std"                "gcc_mean"             "gcc_std"             
#> [19] "gcc_50"               "gcc_75"               "gcc_90"              
#> [22] "rcc_mean"             "rcc_std"              "rcc_50"              
#> [25] "rcc_75"               "rcc_90"               "max_solar_elev"      
#> [28] "snow_flag"            "outlierflag_gcc_mean" "outlierflag_gcc_50"  
#> [31] "outlierflag_gcc_75"   "outlierflag_gcc_90"   "YEAR"                
#> [34] "DOY"                  "YYYYMMDD"

fluxfile <- system.file('fluxnetrepo/FLX_US-Me2/FLX_US-Me2_FULLSET_DD.csv', package = 'phenocamapi')

fluxts <- read.csv(fluxfile, skip = 0)
fluxts[fluxts==-9999] <- NA
fluxts <- as.data.table(fluxts)
fluxts[,datetime:=as.POSIXct(as.character(TIMESTAMP), format='%Y%m%d')]
fluxts[,YYYYMMDD:=as.character(as.Date(datetime))]
fluxts[,YEAR:=year(datetime)]
fluxts[,DOY:=yday(datetime)]

head(fluxts[, .(TIMESTAMP, TA_F)])
#>    TIMESTAMP    TA_F
#> 1:  20141115 -10.105
#> 2:  20141116  -8.044
#> 3:  20141117  -4.550
#> 4:  20141118  -1.584
#> 5:  20141119  -1.805
#> 6:  20141120   4.019

PhenoCam API and Data Fusion

Bijan Seyednasrollah

2019-05-20

Exploriong the PhenoCam metadata

Filtering the dataset using attributes

Downloading PhenoCam time-series data

Merge with other time-series such as flux data