Datos de la muestra
dat <- structure(list(location.id = structure(c(198L, 243L, 147L, 114L,152L, 117L, 151L, 129L, 148L, 154L, 233L, 12L,
226L, 129L, 146L,126L, 247L, 147L, 80L, 133L, 162L, 147L, 152L, 126L, 36L, 154L,147L, 166L, 132L, 241L,
146L, 152L, 141L, 227L, 41L, 187L, 131L,140L, 232L, 155L, 130L, 166L, 222L, 246L, 222L, 129L, 262L, 244L,
188L, 210L), .Label = c("11001", "11003", "11005", "11006","11007","11008", "14001", "14002", "15002",
"15010", "15012", "15014","15015", "15017", "15018", "15019","15021", "15022", "17001", "17002","17003",
"17004", "17005", "17006", "17007", "17008","21008", "21009","21011", "21012", "21013", "21014","21018",
"21019", "21020", "21021", "22001", "22002", "22003", "22005","22007", "22008", "22009", "22010","22012",
"29001", "29002","29003", "29007", "29023", "31001", "31002","31003", "31006","31011","31017","31018",
"31019", "31020", "31021", "31022","31023", "31024","31025","31026", "31027", "31029","31042","31043",
"31044", "31047", "31048", "31049", "31050", "31051","31054","31057", "31058", "31060","35001","35002",
"35003","35004", "35005", "35006", "35007", "35008","35009","35010","35011", "35012","35013","35014",
"35015", "35016", "35017", "35018", "35019", "35020","35021","35022", "35023", "35024","35025","35026", "35027", "35028", "35029", "35030", "35031",
"35032", "35034", "35035", "35036", "35037", "35038","35039","35040", "35041", "35042","35043","35044", "35046", "35048",
"35050", "41001", "41002", "41003", "41004", "41005","41006","41007", "41008", "41009","41010","41011", "41012", "41013",
"41014", "41015", "41016", "41017", "41018", "41019","41020","41021", "41022", "41023","41024","41025", "41026", "41027",
"41028", "41029", "41030", "41031", "41032", "41033","41034","41035", "41036", "41037","41038","41039", "42001", "42002",
"42003", "42004", "42005", "42006", "42007", "42009","42010","42011", "42014", "42019", "42020","43001", "43002", "43003",
"43004", "43005", "43006", "43007", "43008", "43009","43010","43011", "43012", "43013", "43014","43015", "43016", "43017",
"43018", "43019", "43020", "43021", "43022", "43023","43024","43025", "43026", "43027", "43028","43029", "43030", "43031",
"43032", "43033", "43034", "43035", "50001", "50002", "50003","50004", "50005", "50006", "50007","50008", "50009", "50010",
"50011", "51001", "51002", "51003", "51004", "51005", "51006","51007", "51008", "51009", "51010","51011", "51012", "51013",
"51014", "51015", "51016", "51017", "51018", "51019", "51020","51021", "51022", "52001", "52002", "52003", "52004", "52005",
"52006", "52007", "52008", "52009", "52010", "52011", "52012","52013", "52014", "52015", "52016", "52017", "52018", "53001"
), class = "factor"), year = structure(c(4L, 3L, 26L, 27L, 14L,19L, 13L, 19L, 9L, 21L, 4L, 20L, 27L, 17L, 25L, 23L, 3L, 13L,
10L, 22L, 27L, 27L, 21L, 15L, 26L, 27L, 15L, 18L, 21L, 28L, 22L, 17L, 17L, 26L, 8L, 26L, 13L, 13L, 2L, 19L, 17L, 12L, 5L, 17L,
27L, 4L, 5L, 16L, 28L, 26L), .Label = c("1987", "1988", "1989", "1990", "1991", "1992", "1993", "1994", "1995", "1996", "1997",
"1998", "1999", "2000", "2001", "2002", "2003", "2004", "2005",
"2006", "2007", "2008", "2009", "2010", "2011", "2012", "2013",
"2014"), class = "factor"), yield = c(731.07, 2264.99,3382.08, 2520.42, 3093.13, 3030.99,
2796.10, 2098.87, 2797.54, 3037.78,1980, 2720, 2966.98, 2799.68, 3290.9, 3199.72, 702.70, 2085.21,2396.69,
1344, 2975.39, 3475.36, 2918.5, 3470,2864.99, 2849.73, 3398.87, 359.45,
2579.98, 2849.49, 2323.80, 2345.07,2798.13, 2931.56, 1199.93, 3170.05,
1599.72, 1735.04, 1600.24, 2299.06,2472.64, 1682.35, 1599.93, 2675.57,
2595.38, 1989.35, 2182.74, 2883.3,3523.37, 2820.11), dhs.rep = c(0.11,
12.94, 6.1, 7.41, 0, 0.03, 0, 0, 0, 4.32, 5.61, 0, 0, 2.01, 0, 1.9, 2.85,
0.14, 0.27, 4.16, 0, 1.89, 2.75, 0.98, 0, 0, 0, 0.5, 6.48, 2.95, 0, 0,
3, 3.76, 0.15, 0, 0, 0, 0, 0, 2.87,1.83, 0, 6.02, 11.17, 1.15, 0, 12.33, 0, 3.55), nhs.rip = c(0,
37.47, 9.47, 21.6, 3.18, 16.08, 0, 0, 0, 0, 3.15, 12.01, 6.16, 0, 0, 34.92, 0.1, 2.58,
11.01, 4.91, 0, 19.07, 0, 9.54, 70.28, 2.2, 0.52, 2.16, 0.7,108.32, 0, 0, 1.08, 12.17, 13.73, 0, 4.7, 0.25, 17.15, 0, 0,
2.21, 0.78, 26.86, 11.62, 0.99, 0, 72.74, 0.42, 2.01), solar.veg = c(21.57, 20.22, 21.7, 23.2, 22.95, 22.14,
21.85, 21.95, 22.44, 22.07, 21.138, 17.37, 16.6, 22.88, 19.82, 22.83, 17.87, 22.11,
18.8, 19.97, 17.28, 24.81, 21.57, 21.04, 15.46, 24.49,19.55, 21.12, 19.41, 19.30,
18.34, 21.5, 20.94, 17.7, 16.95, 22.13, 21.72, 21.12, 17.235, 21.97, 23.154, 24.24, 21.42,
19.17, 23.3216, 19.11, 20.21, 20.66, 24.74, 24.95), elevation = c(232.53,487.04, 269.5, 357.94, 636.83,
400.53, 489.75, 585.67, 662.87, 641.73, 353.21, 337.01, 314.9, 421.94, 887.89, 346.45, 321.6,
269.5, 388.07, 386.98, 906.96, 503.98, 575.18, 421.59, 450.33, 597.56, 312.68, 401.59, 383.07, 124, 887.89, 687.08, 519.76,
527.22, 409.98, 601.31, 528.53, 420.48, 186.81, 858.06, 548.78,532.25, 362.1, 257.96, 362.1, 487.63, 805.64, 453.03, 410.2,
34.96)), row.names = c(6645L, 8041L, 4028L, 1999L, 4974L, 2063L,4932L, 2602L, 4409L, 5200L, 7747L, 78L, 7384L, 2734L, 3944L,
2255L, 8262L, 4021L, 1368L, 3334L, 5596L, 4286L, 5083L, 2258L, 542L, 5328L, 4153L, 5721L, 3244L, 7965L, 3948L, 4980L, 3778L,
7408L, 691L, 6367L, 3121L, 3630L, 7712L, 5370L, 2752L, 5765L, 7169L, 8238L, 7171L, 2621L, 8705L, 8169L, 6425L, 6847L), class = "data.frame")
Los datos tienen una variable de respuesta llamada "rendimiento" y predictores que son ciertas variables climáticas, elevación, location.id y año. Estoy intentando ejecutar la implementación de MARS desde el paquete earth siguiendo esta viñeta:
http://www.milbo.org/doc/earth-notes.pdf
En esta implementación, no quiero ejecutar las interacciones entre elevation
, location.id
y year
con cualquiera de las otras variables. En esta viñeta, hay un ejemplo de que si quiero excluir la interacción entre dos variables, puedo hacerlo de la siguiente manera;
PREDICTORS <- c("elevation","location.id","year") # these variables are not allowed to interact with any variables in PARENTS
PARENTS <- names(dat)[-3] # the third column is my response variables so I am removing it
no.int <- function(degree, pred, parents, namesx){
if (degree > 1) {
predictor <- namesx[pred]
parents <- namesx[parents != 0]
if((any(predictor %in% PREDICTORS) && any(parents %in% PARENTS)) ||
(any(predictor %in% PARENTS) && any(parents %in% PREDICTORS))) {
return(FALSE)
}
}
TRUE
}
library(earth)
fit.yld <- earth(x = dat[,PARENTS], y = dat[,3], keepxy = T, degree = 2, allowed = no.int,pmethod = "none")
summary(fit.yld)
Quiero probar interacciones de 2 vías por eso puse grado = 2. Sin embargo, quiero excluir la elevación, la ubicación y el año para interactuar entre sí, así como con cualquiera de las variables climáticas en el modelo. En la viñeta de la página 21, en el párrafo inferior dice que usando el argumento permitido: Evita que el predictor especificado se utilice en términos de interacción
Si ejecuto la función anterior, sigo viendo algunos términos en los que la ubicación y los años interactúan. Sin embargo, la función debería ser capaz de detener cualquier tipo de interacción entre estas dos variables.
Entiendo que este es un tema bastante específico con respecto a un paquete por lo que realmente agradecería un poco de ayuda.
Gracias