He arreglado tu código. El principal problema es tu definición de las funciones hinge y d_hinge. Éstas deberían aplicarse una muestra a la vez. En cambio, tu definición agrega todas las muestras antes de tomar el máximo.
#Run standard gradient descent
gradient_descent<-function(fw, dfw, n, lr=0.01)
{
#Date to be used
x<-t(matrix(c(1,3,6,1,4,2,1,5,4,1,6,1), nrow=3))
y<-t(t(c(1,1,-1,-1)))
w<-matrix(0, nrow=ncol(x))
print(sprintf("loss: %f,x.w: %s",sum(mapply(function(xr,yr) fw(w,xr,yr), split(x,row(x)),split(y,row(y)))),paste(x%*%w, collapse=',')))
#update the weights 'n' times
for (i in 1:n)
{
w<-w-lr*dfw(w,x,y)
print(sprintf("loss: %f,x.w: %s",sum(mapply(function(xr,yr) fw(w,xr,yr), split(x,row(x)),split(y,row(y)))),paste(x%*%w,collapse=',')))
}
}
#Hinge loss
hinge<-function(w,xr,yr) max(1-yr*xr%*%w, 0)
d_hinge<-function(w,x,y){ dw<- apply(mapply(function(xr,yr) -yr * xr * (yr * xr %*% w < 1),split(x,row(x)),split(y,row(y))),1,sum); dw}
gradient_descent(hinge, d_hinge, 100, lr=0.01)
Necesito n=10000 para converger.
[1] "pérdida: 0.090000,x.w: 1.08999999999995,0.909999999999905,-1.19000000000008,-1.69000000000011" [1] "pérdida: 0.100000,x.w: 1.33999999999995,1.1199999999999,-0.900000000000075,-1.42000000000011" [1] "pérdida: 0,230000,x.w: 0.939999999999948,0.829999999999905,-1.32000000000007,-1.77000000000011" [1] "pérdida: 0,370000,x.w: 1.64999999999995,1.2899999999999,-0.630000000000075,-1.25000000000011" [1] "pérdida: 0.000000,x.w: 1.24999999999995,0.999999999999905,-1.05000000000008,-1.60000000000011" [1] "pérdida: 0.240000,x.w: 1.49999999999995,1.2099999999999,-0.760000000000075,-1.33000000000011" [1] "pérdida: 0.080000,x.w: 1.09999999999995,0.919999999999905,-1.18000000000007,-1.68000000000011" [1] "pérdida: 0.110000,x.w: 1.34999999999995,1.1299999999999,-0.890000000000075,-1.41000000000011" [1] "pérdida: 0,210000,x.w: 0.949999999999948,0.839999999999905,-1.31000000000007,-1.76000000000011" [1] "pérdida: 0,380000,x.w: 1.65999999999995,1.2999999999999,-0.620000000000074,-1.24000000000011" [1] "pérdida: 0.000000,x.w: 1.25999999999995,1.0099999999999,-1.04000000000008,-1.59000000000011" [1] "pérdida: 0.000000,x.w: 1.25999999999995,1.0099999999999,-1.04000000000008,-1.59000000000011"