#ifndef FL2C0_optimsvd
#include <profile1.c>
#include <FL2/C0/problem.c>
#include <svd.c>
#include <lset.c>
#include <rnd/cu.c>
#include <rnd/frnd.c>

void FL2Problem_svd_trylambdas_results(FL2Problem *p,
	const REAL *a,const REAL *o, // Current position input/output vectors
	const REAL *s,const REAL * const *u,const REAL * const *v, // SVD of the response matrix
	const REAL *l,const int lm, // lambdas to try
	const REAL *muls,const int mulm, // step multiples to try
	REAL ***paa,REAL ***pba) // Pointer to arrays of inputs/outputs
{
 	int n,i,j,m=p->am; REAL mul;
	*paa=array_2d(lm*mulm,m); *pba=array_2d(lm*mulm,p->bm);
	for (n=lm-1;n>=0;n--)
	{
		REAL *c=svdsolve_inner(s,u,v,o,l[n]);
		for (i=mulm-1;i>=0;i--)
		{
			mul=muls[i];
		   	for (j=m-1;j>=0;j--) (*paa)[n*mulm+i][j]=a[j]-c[j]*mul;
		}
	   	array_1d_free(c);
	}
	FL2Problem_evals(p,*paa,*pba,lm*mulm);
}

void range_decrease_graphs(REAL ***aas,REAL ***bas,const int ms,const REAL *a,const REAL *o,
	const unsigned *cs)
{
#ifdef SBCLIB_GRAPHICS
	int n,i,j,am=array_2d_ys(aas[0]),bm=array_2d_ys(bas[0]);
	REAL eref=FL2Problem_errornorm(o,bm),emax=1e-20;
	for (j=0;j<ms;j++) for (n=0;n<array_2d_xs(aas[j]);n++)
		emax=Max(emax,eref-FL2Problem_errornorm(bas[j][n],bm));
	for (j=0;j<ms;j++)
	{
		glBegin(GL_LINE_STRIP); glColor3f(0.4*(cs[j]>>16)/255.0,0.4*((cs[j]>>8)&0xFF)/255.0,0.4*(cs[j]&0xFF)/255.0);
		for (n=0;n<array_2d_xs(aas[j]);n++)
		{
			REAL d=0;
			for (i=0;i<am;i++) d+=sq(aas[j][n][i]-a[i]);
			d=sqrt(d);
			REAL e=eref-FL2Problem_errornorm(bas[j][n],bm);
			glVertex2f(xres*3/4+log10(d)*xres/30,yres/4+e/emax*yres/4);
		}
		glEnd();
	}
	for (j=0;j<ms;j++) for (n=0;n<array_2d_xs(aas[j]);n++)
	{
		REAL d=0;
		for (i=0;i<am;i++) d+=sq(aas[j][n][i]-a[i]);
		d=sqrt(d);
		REAL e=eref-FL2Problem_errornorm(bas[j][n],bm);
		putpix(xres*3/4+log10(d)*xres/30,yres/4+e/emax*yres/4,cs[j]);
	}
#endif
}
//void range_decrease_graph(REAL **aa,REAL **ba,const REAL *a,const REAL *o,
//	const unsigned c=GFX_white) {range_decrease_graphs(&aa,&ba,1,a,o,&c);}

void range_decrease_save(REAL **aa,REAL **ba,const REAL *a,const REAL *o,const char *filetag="")
{
	int n,i,m=array_2d_xs(aa),am=array_2d_ys(aa),bm=array_2d_ys(ba); char file[200];
	sprintf(file,"range_decrease_%srel.csv",filetag);
	FILE *out=fopen(file,"wt");
	fprintf(out,"|delta a|");
	for (n=0;n<am;n++) fprintf(out,",delta a[%d]",n);
	fprintf(out,",delta |b|");
	for (n=0;n<bm;n++) fprintf(out,",delta b[%d]",n);
	fprintf(out,"\n");
	REAL eref=FL2Problem_errornorm(o,bm);
	for (n=0;n<m;n++)
	{
		REAL dd=0;
		for (i=0;i<am;i++) dd+=sq(aa[n][i]-a[i]);
		fprintf(out,"%.16lg",sqrt(dd));
		for (i=0;i<am;i++) fprintf(out,",%.16lg",aa[n][i]-a[i]);
		REAL e=FL2Problem_errornorm(ba[n],bm);
		fprintf(out,",%.16lg",e-eref);
		for (i=0;i<bm;i++) fprintf(out,",%.16lg",ba[n][i]-o[i]);
		fprintf(out,"\n");
	}
	fclose(out);
	sprintf(file,"range_decrease_%sabs.csv",filetag);
	out=fopen(file,"wt");
	fprintf(out,"|a|");
	for (n=0;n<am;n++) fprintf(out,",a[%d]",n);
	fprintf(out,",|b|");
	for (n=0;n<bm;n++) fprintf(out,",b[%d]",n);
	fprintf(out,"\n");
	for (n=0;n<m;n++)
	{
		REAL dd=0;
		for (i=0;i<am;i++) dd+=sq(aa[n][i]);
		fprintf(out,"%.16lg",sqrt(dd));
		for (i=0;i<am;i++) fprintf(out,",%.16lg",aa[n][i]);
		REAL e=FL2Problem_errornorm(ba[n],bm);
		fprintf(out,",%.16lg",e);
		for (i=0;i<bm;i++) fprintf(out,",%.16lg",ba[n][i]);
		fprintf(out,"\n");
	}
	fclose(out);
}

void line_test(FL2Problem *p,const REAL *a,const REAL *o,const REAL *c1)
{ // Try small steps (from a in direction c1) and see if there are numerical steps in the function
	int n,i,m=300; const REAL f0=1e-15;
	REAL **aa=array_2d(m,p->am),**ba=array_2d(m,p->bm);
	for (n=m-1;n>=0;n--)
	{
		REAL f=pow(f0,((REAL)m-1-n)/(m-1));
		for (i=p->am-1;i>=0;i--) aa[n][i]=a[i]+f*c1[i];
	}
	FL2Problem_evals(p,aa,ba,m);
	range_decrease_save(aa,ba,a,o,"linetest_");
	array_2d_free(aa); array_2d_free(ba);
}

REAL *FL2Problem_svd_trylambdas_best(FL2Problem *p,REAL **aa,REAL **ba,
	const REAL *l,const int lm,
	const REAL *muls,const int mulm,
	REAL *pbe,REAL *pbl,REAL *pbmul) // Pointer to lowest error, best lambda, best mul
{ // Constructs an array_1d with the best sample point vector in it, except if *pbe isn't beaten, when you get NULL
	int n,i,m=p->am;
	REAL e,*ret=array_1d(m); *pbl=-1; *pbmul=-1;
	for (n=lm-1;n>=0;n--) for (i=mulm-1;i>=0;i--)
	{
   		e=FL2Problem_errornorm(ba[n*mulm+i],p->bm);
   		if (e<*pbe) {memcpy(ret,aa[n*mulm+i],m*sizeof(REAL)); *pbe=e; *pbl=l[n]; *pbmul=muls[i];}
	}
	if (*pbl<0) {array_1d_free(ret); return NULL;} // Will also give bl=bmul=-1
	else return ret;
}

REAL *FL2Problem_svd_trylambdas(FL2Problem *p,
	const REAL *a,const REAL *o, // Current position input/output vectors
	const REAL *s,const REAL * const *u,const REAL * const *v, // SVD of the response matrix
	const REAL *l,const int lm, // lambdas to try
	const REAL *muls,const int mulm, // step multiples to try
	REAL *pbe,REAL *pbl,REAL *pbmul) // Pointer to lowest error, best lambda, best mul
{ // Constructs an array_1d with the best sample point vector in it, except if *pbe isn't beaten, when you get NULL
	REAL **aa,**ba;
	FL2Problem_svd_trylambdas_results(p, a,o, s,u,v, l,lm,muls,mulm, &aa,&ba);
	REAL *ret=FL2Problem_svd_trylambdas_best(p, aa,ba, l,lm,muls,mulm, pbe,pbl,pbmul);
	array_2d_free(aa); array_2d_free(ba);
	return ret;
}

REAL *FL2Problem_svd_trylambdas_best_many(FL2Problem *p,REAL ***aaa,REAL ***baa,const int many,
	const REAL *l,const int lm,
	const REAL *muls,const int mulm,
	REAL *pbe,REAL *pbl,REAL *pbmul) // Pointer to lowest error, best lambda, best mul
{ // Constructs an array_1d with the best sample point vector in it, except if *pbe isn't beaten, when you get NULL
	int n,i,j,m=p->am;
	REAL e,*ret=array_1d(m); *pbl=-1; *pbmul=-1;
	for (j=many-1;j>=0;j--) for (n=lm-1;n>=0;n--) for (i=mulm-1;i>=0;i--)
	{
   		e=FL2Problem_errornorm(baa[j][n*mulm+i],p->bm);
   		if (e<*pbe) {memcpy(ret,aaa[j][n*mulm+i],m*sizeof(REAL)); *pbe=e; *pbl=l[n]; *pbmul=muls[i];}
	}
	if (*pbl<0) {array_1d_free(ret); return NULL;} // Will also give bl=bmul=-1
	else return ret;
}

LSet FL2Problem_svd_suggestlambdas(const REAL lambda)
{
	const REAL oom=4; const int rm=10; // Orders of magnitude to span and number of points each way
	int n; LSet ret=LSet(REAL);
	for (n=-rm;n<=rm;n++)
	{
		REAL l=lambda*pow(10,oom*cu(n/(REAL)rm));
		LSet_add(&ret,&l);
	}
	REAL ll=log10(lambda); // Check values back towards 1 if lambda getting too small or large
	if (fabs(ll)>oom) for (n=0;n+oom<fabs(ll);n++)
	{
		REAL l=pow((lambda>1?10:0.1),n);
		LSet_add(&ret,&l);
	}
	return ret;
}

int FL2Problem_svd_srm_period=1;
REAL lambdamul=1; // lambda multiplier for solving systems for higher-order steps
int FL2Problem_svd_saverm=0; REAL **FL2Problem_svd_rm=NULL,**FL2Problem_svd_u=NULL,**FL2Problem_svd_v=NULL,*FL2Problem_svd_s=NULL;
void FL2Problem_svd(FL2Problem *p,REAL *a,REAL *plambda,
	const REAL smooth=1,const int ver=9) // ver=2 improves per-step performance by ~2x but much slower steps
{ // Start with lambda=1
profile("FL2Problem_svd - init");
	int n,i,m=p->am; // Modified so a can be any array (not array_1d)
profile("FL2Problem_response");
	static REAL **srm=NULL; static int srm_turn=0; 
	REAL *o,**rm;
	if (srm_turn && srm) {rm=array_2d_copy(srm); o=array_1d(p->bm); FL2Problem_eval(p,a,o);}
	else {rm=FL2Problem_response(p,a,&o); if (srm) array_2d_free(srm); srm=array_2d_copy(rm);}
	srm_turn=(srm_turn+1)%FL2Problem_svd_srm_period;
	REAL eref=FL2Problem_errornorm(o,p->bm);
#ifdef SBCLIB_GRAPHICS
if (xres) ortho_on();
//array_2d_plot(xres*2/3,0,2,rm,colmap2);
#endif
char str[300];
profile("svd");
	REAL **u,**v,*s=svd(rm,&u,&v),bl,bmul,be=eref;
	if (FL2Problem_svd_saverm)
	{
		if (FL2Problem_svd_rm) {array_2d_free(FL2Problem_svd_rm); svdfree(FL2Problem_svd_s,FL2Problem_svd_u,FL2Problem_svd_v);}
		FL2Problem_svd_rm=array_2d_copy(rm); FL2Problem_svd_u=array_2d_copy(u); FL2Problem_svd_v=array_2d_copy(v); FL2Problem_svd_s=array_1d_copy(s);
	}
profile("iterations with evalerrornorm");
	LSet ls=FL2Problem_svd_suggestlambdas(*plambda),muls=LSet(REAL);
	if (ver==2) for (n=-10;n<=10;n++)
	{
		REAL mul=pow(1e4,cu(0.1*n)); //exp(0.1*n);
		LSet_add(&muls,&mul);
	}
	else LSet_add(&muls,&smooth);
	REAL *b;
	if (ver<=2)
	{
		b=FL2Problem_svd_trylambdas(p, a,o, s,u,v,
			(REAL *)ls.a,ls.m, (REAL *)muls.a,muls.m, &be,&bl,&bmul);
		if (!b) // If it gets stuck, do fine scan
		{
profile("If it gets stuck, do fine scan");
			LSet_clear(&ls);
			for (REAL l=1e-15*pow(1.1,frnd(1));l<=1e15;l*=1.1) LSet_add(&ls,&l);
			b=FL2Problem_svd_trylambdas(p, a,o, s,u,v,
				(REAL *)ls.a,ls.m, (REAL *)muls.a,muls.m, &be,&bl,&bmul);
		}
	}
	else if (ver>=3 && ver<=6) // Second order correction for each lambda, sort of based on https://en.wikipedia.org/wiki/Levenberg%E2%80%93Marquardt_algorithm#Geodesic_acceleration
	{ // Also "naive" 3rd and 4th order
		REAL **aa,**ba; int j;
		FL2Problem_svd_trylambdas_results(p, a,o, s,u,v,
			(REAL *)ls.a,ls.m, (REAL *)muls.a,muls.m, &aa,&ba);
profile("iterations 2nd order");
		int ma=array_2d_xs(aa);
		REAL **aa2=array_2d(ma,m),**ba2=array_2d(ma,p->bm),*est=array_1d(p->bm),**c2=array_2d(ma,m);
		for (n=ma-1;n>=0;n--)
		{
			if (ver!=4)
			{
				for (i=p->bm-1;i>=0;i--)
				{
					est[i]=o[i];
					for (j=m-1;j>=0;j--) est[i]+=rm[i][j]*(aa[n][j]-a[j]);
					est[i]-=ba[n][i]; // ~ -1/2 d2f x'^2 = 1/2 df x''
				}
				REAL *c=svdsolve_inner(s,u,v,est,((REAL *)ls.a)[n]*lambdamul); // If I use 1e-15 this becomes same performance as ver=4
				memcpy(c2[n],c,m*sizeof(REAL));
			   	array_1d_free(c);
			}
			else if (ver==4)
			{ // Alternate method, seems less good than ver=3, although using small lambda for inversion helps
				for (i=p->bm-1;i>=0;i--) est[i]=o[i]-ba[n][i]; // ~ -df x' -1/2 d2f x'^2
				REAL *c=svdsolve_inner(s,u,v,est,1e-15);
				for (i=m-1;i>=0;i--) c2[n][i]=c[i]+(aa[n][i]-a[i]);
			   	array_1d_free(c);
			}
			for (i=m-1;i>=0;i--) aa2[n][i]=aa[n][i]+c2[n][i];
		}
		FL2Problem_evals(p,aa2,ba2,ma);
		REAL **aa3=NULL,**ba3,**c3,**as3,**bs3;
		if (ver==5 || ver==6) // Try third order
		{
profile("iterations 3rd order");
			as3=array_2d(ma*2,m); bs3=array_2d(ma*2,p->bm); // Stencil points
			for (n=ma-1;n>=0;n--) for (i=m-1;i>=0;i--)
			{
				as3[n][i]=0.5*a[i]+0.5*aa[n][i];
				as3[ma+n][i]=a[i]+c2[n][i];
			}
			FL2Problem_evals(p,as3,bs3,ma*2);
			REAL **f3c1c1c1=array_2d(ma,p->bm),**f2c1c2=array_2d(ma,p->bm);
			for (n=ma-1;n>=0;n--) for (i=p->bm-1;i>=0;i--) 
			{
				REAL lin=0;
				for (j=m-1;j>=0;j--) lin+=rm[i][j]*(aa[n][j]-a[j]);
				REAL nl1=ba[n][i]-(o[i]+lin),nlh=bs3[n][i]-(o[i]+0.5*lin);
				f3c1c1c1[n][i]=12.0*nl1-48.0*nlh;
				f2c1c2[n][i]=ba2[n][i]-ba[n][i]-(bs3[ma+n][i]-o[i]);
			}
			aa3=array_2d(ma,m); ba3=array_2d(ma,p->bm); c3=array_2d(ma,m);
			for (n=ma-1;n>=0;n--)
			{
				for (i=p->bm-1;i>=0;i--) est[i]=-1.0/6.0*(f3c1c1c1[n][i]+3.0*2.0*f2c1c2[n][i]);
				REAL *c=svdsolve_inner(s,u,v,est,((REAL *)ls.a)[n]*lambdamul);
				memcpy(c3[n],c,m*sizeof(REAL));
			   	array_1d_free(c);
				for (i=m-1;i>=0;i--) aa3[n][i]=aa2[n][i]+c3[n][i];
			}
			array_2d_free(f3c1c1c1); array_2d_free(f2c1c2);
			FL2Problem_evals(p,aa3,ba3,ma);
		}
		REAL **aa4=NULL,**ba4,**c4,**as4,**bs4;
		if (ver==6) // Try fourth order
		{
profile("iterations 4th order");
			as4=array_2d(ma*4,m); bs4=array_2d(ma*4,p->bm); // Stencil points
			for (n=ma-1;n>=0;n--) for (i=m-1;i>=0;i--)
			{
				as4[n][i]=-0.5*a[i]+1.5*aa[n][i];
				as4[ma+n][i]=0.5*a[i]+0.5*aa[n][i]+c2[n][i];
				as4[ma*2+n][i]=a[i]+c3[n][i];
				as4[ma*3+n][i]=aa[n][i]+c3[n][i];
			}
			FL2Problem_evals(p,as4,bs4,ma*4);
			REAL **f4c1c1c1c1=array_2d(ma,p->bm),**f3c1c1c2=array_2d(ma,p->bm),
				**f2c1c3=array_2d(ma,p->bm),**f2c2c2=array_2d(ma,p->bm);
			for (n=ma-1;n>=0;n--) for (i=p->bm-1;i>=0;i--) 
			{
				REAL lin=0,linc2=0;
				for (j=m-1;j>=0;j--) {lin+=rm[i][j]*(aa[n][j]-a[j]); linc2+=rm[i][j]*c2[n][j];}
				REAL nl1=ba[n][i]-(o[i]+lin),nlh=bs3[n][i]-(o[i]+0.5*lin),
					nl15=bs4[n][i]-(o[i]+1.5*lin);
				f4c1c1c1c1[n][i]=192.0*nlh-96.0*nl1+64.0/3.0*nl15;
				f3c1c1c2[n][i]=(4.0*bs3[ma+n][i]-8.0*bs4[ma+n][i]-4.0*ba2[n][i])-
					(4.0*o[i]-8.0*bs3[n][i]+4.0*ba[n][i]);
				f2c1c3[n][i]=bs4[ma*3+n][i]-bs4[ma*2+n][i]-(ba[n][i]-o[i]);
				f2c2c2[n][i]=2.0*(bs3[ma+n][i]-(o[i]+linc2));
			}
			aa4=array_2d(ma,m); ba4=array_2d(ma,p->bm); c4=array_2d(ma,m);
			for (n=ma-1;n>=0;n--)
			{
				for (i=p->bm-1;i>=0;i--) est[i]=-1.0/24.0*(
					f4c1c1c1c1[n][i]+6.0*2.0*f3c1c1c2[n][i]+4.0*6.0*f2c1c3[n][i]+3.0*4.0*f2c2c2[n][i]);
				REAL *c=svdsolve_inner(s,u,v,est,((REAL *)ls.a)[n]*lambdamul);
				memcpy(c4[n],c,m*sizeof(REAL));
			   	array_1d_free(c);
				for (i=m-1;i>=0;i--) aa4[n][i]=aa3[n][i]+c4[n][i];
			}
			array_2d_free(f4c1c1c1c1); array_2d_free(f3c1c1c2); array_2d_free(f2c1c3); array_2d_free(f2c2c2);
			FL2Problem_evals(p,aa4,ba4,ma);
		}
		array_1d_free(est);
		if (ver==5 || ver==6) {array_2d_free(as3); array_2d_free(bs3);}
		if (ver==6) {array_2d_free(as4); array_2d_free(bs4);}
		b=FL2Problem_svd_trylambdas_best(p, aa,ba,
			(REAL *)ls.a,ls.m, (REAL *)muls.a,muls.m, &be,&bl,&bmul);
		array_2d_free(aa); array_2d_free(ba);
		REAL be1=be;
		{
			REAL bl2,bmul2;
			REAL *b2=FL2Problem_svd_trylambdas_best(p, aa2,ba2,
				(REAL *)ls.a,ls.m, (REAL *)muls.a,muls.m, &be,&bl2,&bmul2); /**/ // could compare be values here
			if (b2) {if (b) array_1d_free(b); b=b2; bl=bl2; bmul=bmul2;}
		}
		array_2d_free(aa2); array_2d_free(ba2); array_2d_free(c2);
		REAL be2=be;
		if (aa3)
		{
			REAL bl3,bmul3;
			REAL *b3=FL2Problem_svd_trylambdas_best(p, aa3,ba3,
				(REAL *)ls.a,ls.m, (REAL *)muls.a,muls.m, &be,&bl3,&bmul3);
			if (b3) {if (b) array_1d_free(b); b=b3; bl=bl3; bmul=bmul3;}
			array_2d_free(aa3); array_2d_free(ba3); array_2d_free(c3);
		}
		REAL be3=be;
		if (aa4)
		{
			REAL bl4,bmul4;
			REAL *b4=FL2Problem_svd_trylambdas_best(p, aa4,ba4,
				(REAL *)ls.a,ls.m, (REAL *)muls.a,muls.m, &be,&bl4,&bmul4);
			if (b4) {if (b) array_1d_free(b); b=b4; bl=bl4; bmul=bmul4;}
			array_2d_free(aa4); array_2d_free(ba4); array_2d_free(c4);
		}
#ifdef SBCLIB_GRAPHICS
if (xres)
{
sprintf(str,"Rel. dec. 1o %.3lg 2o %.3lg 3o %.3lg 4o %.3lg\n(%.3lgx %.3lgx %.3lgx)",
	(eref-be1)/eref,(eref-be2)/eref,(eref-be3)/eref,(eref-be)/eref,
	(eref-be2)/(eref-be1),(eref-be3)/(eref-be1),(eref-be)/(eref-be1));
bfwrite(xres*3/4,BF_height*7,str,0xFFC000);
}
#endif
	}
	else if (ver==7) // "Proper" 3rd order
	{
		REAL **aa,**ba; int j;
		FL2Problem_svd_trylambdas_results(p, a,o, s,u,v,
			(REAL *)ls.a,ls.m, (REAL *)muls.a,muls.m, &aa,&ba);
		int ma=array_2d_xs(aa);
		REAL **c1=array_2d(ma,m),**c2=array_2d(ma,m),**c3=array_2d(ma,m);
		for (n=ma-1;n>=0;n--) for (i=m-1;i>=0;i--) c1[n][i]=aa[n][i]-a[i];
profile("iterations 3rd order stencil c1");
		REAL **as31=array_2d(ma,m),**bs31=array_2d(ma,p->bm); // Stencil point (c1)
		for (n=ma-1;n>=0;n--) for (i=m-1;i>=0;i--)
		{
			as31[n][i]=0.5*a[i]+0.5*aa[n][i];
		}
		FL2Problem_evals(p,as31,bs31,ma);
		REAL **f2c1c1=array_2d(ma,p->bm),**f3c1c1c1=array_2d(ma,p->bm);
		REAL *est=array_1d(p->bm);
		for (n=ma-1;n>=0;n--)
		{
			for (i=p->bm-1;i>=0;i--) 
			{
				REAL linc1=0;
				for (j=m-1;j>=0;j--) linc1+=rm[i][j]*c1[n][j];
				REAL nl1=ba[n][i]-(o[i]+linc1),nlh=bs31[n][i]-(o[i]+0.5*linc1);
				f2c1c1[n][i]=16.0*nlh-2.0*nl1; // 3rd order stencils
				f3c1c1c1[n][i]=12.0*nl1-48.0*nlh;
				est[i]=-1.0/2.0*(f2c1c1[n][i]);
			}
			REAL *c=svdsolve_inner(s,u,v,est,((REAL *)ls.a)[n]*lambdamul);
			memcpy(c2[n],c,m*sizeof(REAL));
		   	array_1d_free(c);
		}
profile("iterations 3rd order stencil c2");
		REAL **as32=array_2d(ma*2,m),**bs32=array_2d(ma*2,p->bm); // Stencil points (c2)
		for (n=ma-1;n>=0;n--) for (i=m-1;i>=0;i--)
		{
			as32[n][i]=a[i]+c2[n][i];
			as32[ma+n][i]=aa[n][i]+c2[n][i];
		}
		FL2Problem_evals(p,as32,bs32,ma*2);
		REAL **f2c1c2=array_2d(ma,p->bm);
		for (n=ma-1;n>=0;n--)
		{
			for (i=p->bm-1;i>=0;i--) 
			{
				f2c1c2[n][i]=bs32[ma+n][i]-bs32[n][i]-(ba[n][i]-o[i]); // 2nd order stencil but acting on c1c2~epsilon^3, so OK
				est[i]=-1.0/6.0*(f3c1c1c1[n][i]+3.0*2.0*f2c1c2[n][i]);
			}
			REAL *c=svdsolve_inner(s,u,v,est,((REAL *)ls.a)[n]*lambdamul);
			memcpy(c3[n],c,m*sizeof(REAL));
		   	array_1d_free(c);
		}
		array_2d_free(f2c1c1); array_2d_free(f3c1c1c1); array_2d_free(f2c1c2);
		array_1d_free(est);
profile("iterations 3rd order final");
		REAL **aa3=array_2d(ma,m),**ba3=array_2d(ma,p->bm);
		for (n=ma-1;n>=0;n--) for (i=m-1;i>=0;i--)
		{
			aa3[n][i]=as32[ma+n][i]+c3[n][i];
		}
		array_2d_free(c1); array_2d_free(c2); array_2d_free(c3);
		FL2Problem_evals(p,aa3,ba3,ma);
		// Choose best result
		b=FL2Problem_svd_trylambdas_best(p, aa,ba,
			(REAL *)ls.a,ls.m, (REAL *)muls.a,muls.m, &be,&bl,&bmul);
		array_2d_free(aa); array_2d_free(ba);
		REAL be1=be;
		REAL **aaa[3]={as31,as32,as32+ma},**baa[3]={bs31,bs32,bs32+ma};
		{
			REAL bl2,bmul2;
			REAL *b2=FL2Problem_svd_trylambdas_best_many(p, aaa,baa,3,
				(REAL *)ls.a,ls.m, (REAL *)muls.a,muls.m, &be,&bl2,&bmul2);
			if (b2) {if (b) array_1d_free(b); b=b2; bl=bl2; bmul=bmul2;}
		}
		array_2d_free(as31); array_2d_free(bs31);
		array_2d_free(as32); array_2d_free(bs32);
		REAL be2=be;
		{
			REAL bl2,bmul2;
			REAL *b2=FL2Problem_svd_trylambdas_best(p, aa3,ba3,
				(REAL *)ls.a,ls.m, (REAL *)muls.a,muls.m, &be,&bl2,&bmul2);
			if (b2) {if (b) array_1d_free(b); b=b2; bl=bl2; bmul=bmul2;}
		}
		array_2d_free(aa3); array_2d_free(ba3);
#ifdef SBCLIB_GRAPHICS
if (xres)
{
sprintf(str,"Rel. dec. 1o %.3lg 2o %.3lg 3o %.3lg\n(%.3lgx %.3lgx)",
	(eref-be1)/eref,(eref-be2)/eref,(eref-be)/eref,
	(eref-be2)/(eref-be1),(eref-be)/(eref-be1));
bfwrite(xres*3/4,BF_height*7,str,0xFFC000);
}
#endif		
	}
	else if (ver==8 || ver==9) // "Proper" 4th order (ver=9 also samples 3rd order point)
	{
		REAL **aa,**ba; int j;
		FL2Problem_svd_trylambdas_results(p, a,o, s,u,v,
			(REAL *)ls.a,ls.m, (REAL *)muls.a,muls.m, &aa,&ba);
		int ma=array_2d_xs(aa);
		REAL **c1=array_2d(ma,m),**c2=array_2d(ma,m),**c3=array_2d(ma,m),**c4=array_2d(ma,m);
		for (n=ma-1;n>=0;n--) for (i=m-1;i>=0;i--) c1[n][i]=aa[n][i]-a[i];
//profile("line_test");
//line_test(p,a,o,c1[ma/2]);
profile("iterations 4th order stencil c1");
		REAL **as41=array_2d(ma*2,m),**bs41=array_2d(ma*2,p->bm); // Stencil points (c1)
		for (n=ma-1;n>=0;n--) for (i=m-1;i>=0;i--)
		{
			as41[n][i]=0.5*a[i]+0.5*aa[n][i];
			as41[ma+n][i]=-0.5*a[i]+1.5*aa[n][i];
		}
		FL2Problem_evals(p,as41,bs41,ma*2);
		REAL **f2c1c1=array_2d(ma,p->bm),**f3c1c1c1=array_2d(ma,p->bm),**f4c1c1c1c1=array_2d(ma,p->bm);
		REAL *est=array_1d(p->bm);
		for (n=ma-1;n>=0;n--)
		{
			for (i=p->bm-1;i>=0;i--) 
			{
				REAL linc1=0;
				for (j=m-1;j>=0;j--) linc1+=rm[i][j]*c1[n][j];
				REAL nl1=ba[n][i]-(o[i]+linc1),
					nlh=bs41[n][i]-(o[i]+0.5*linc1),
					nl15=bs41[ma+n][i]-(o[i]+1.5*linc1);
				f2c1c1[n][i]=24.0*nlh-6.0*nl1+8.0/9.0*nl15;
				f3c1c1c1[n][i]=-120.0*nlh+48.0*nl1-8.0*nl15;
				f4c1c1c1c1[n][i]=192.0*nlh-96.0*nl1+64.0/3.0*nl15;
				est[i]=-1.0/2.0*(f2c1c1[n][i]);
			}
			REAL *c=svdsolve_inner(s,u,v,est,((REAL *)ls.a)[n]*lambdamul);
			memcpy(c2[n],c,m*sizeof(REAL));
		   	array_1d_free(c);
		}
profile("iterations 4th order stencil c2");
		REAL **as42=array_2d(ma*3,m),**bs42=array_2d(ma*3,p->bm); // Stencil points (c2)
		for (n=ma-1;n>=0;n--) for (i=m-1;i>=0;i--)
		{
			as42[n][i]=a[i]+c2[n][i];
			as42[ma+n][i]=as41[n][i]+c2[n][i];
			as42[ma*2+n][i]=aa[n][i]+c2[n][i];
		}
		FL2Problem_evals(p,as42,bs42,ma*3);
		REAL **f2c1c2=array_2d(ma,p->bm),**f3c1c1c2=array_2d(ma,p->bm),**f2c2c2=array_2d(ma,p->bm);
		for (n=ma-1;n>=0;n--)
		{
			for (i=p->bm-1;i>=0;i--) 
			{
				REAL linc2=0;
				for (j=m-1;j>=0;j--) linc2+=rm[i][j]*c2[n][j];
				f3c1c1c2[n][i]=(4.0*bs42[n][i]-8.0*bs42[ma+n][i]+4.0*bs42[ma*2+n][i])-
					(4.0*o[i]-8.0*bs41[n][i]+4.0*ba[n][i]);
				f2c1c2[n][i]=(-3.0*bs42[n][i]+4.0*bs42[ma+n][i]-bs42[ma*2+n][i])-
					(-3.0*o[i]+4.0*bs41[n][i]-ba[n][i]);
				f2c2c2[n][i]=2.0*(bs42[n][i]-(o[i]+linc2));
				est[i]=-1.0/6.0*(f3c1c1c1[n][i]+3.0*2.0*f2c1c2[n][i]);
			}
			REAL *c=svdsolve_inner(s,u,v,est,((REAL *)ls.a)[n]*lambdamul);
			memcpy(c3[n],c,m*sizeof(REAL));
		   	array_1d_free(c);
		}
profile("iterations 4th order stencil c3");
		int mas=ma*2+(ver==9)*ma;
		REAL **as43=array_2d(mas,m),**bs43=array_2d(mas,p->bm); // Stencil points (c3)
		for (n=ma-1;n>=0;n--) for (i=m-1;i>=0;i--)
		{
			as43[n][i]=a[i]+c3[n][i];
			as43[ma+n][i]=aa[n][i]+c3[n][i];
			if (ver==9) as43[ma*2+n][i]=aa[n][i]+c2[n][i]+c3[n][i];
		}
		FL2Problem_evals(p,as43,bs43,mas);
		REAL **f2c1c3=array_2d(ma,p->bm);
		for (n=ma-1;n>=0;n--)
		{
			for (i=p->bm-1;i>=0;i--) 
			{
				f2c1c3[n][i]=bs43[ma+n][i]-bs43[n][i]-(ba[n][i]-o[i]); // 2nd order stencil but acting on c1c3~epsilon^4, so OK
				est[i]=-1.0/24.0*(
					f4c1c1c1c1[n][i]+6.0*2.0*f3c1c1c2[n][i]+4.0*6.0*f2c1c3[n][i]+3.0*4.0*f2c2c2[n][i]);
			}
			REAL *c=svdsolve_inner(s,u,v,est,((REAL *)ls.a)[n]*lambdamul);
			memcpy(c4[n],c,m*sizeof(REAL));
		   	array_1d_free(c);
		}
		array_2d_free(f2c1c1); array_2d_free(f3c1c1c1); array_2d_free(f4c1c1c1c1);
		array_2d_free(f2c1c2); array_2d_free(f3c1c1c2); array_2d_free(f2c2c2);
		array_2d_free(f2c1c3);		
		array_1d_free(est);
profile("iterations 4th order final");
		REAL **aa4=array_2d(ma,m),**ba4=array_2d(ma,p->bm);
		for (n=ma-1;n>=0;n--) for (i=m-1;i>=0;i--)
		{
			aa4[n][i]=aa[n][i]+c2[n][i]+c3[n][i]+c4[n][i];
		}
		array_2d_free(c1); array_2d_free(c2); array_2d_free(c3); array_2d_free(c4);
		FL2Problem_evals(p,aa4,ba4,ma);
		// Choose best result
		b=FL2Problem_svd_trylambdas_best(p, aa,ba,
			(REAL *)ls.a,ls.m, (REAL *)muls.a,muls.m, &be,&bl,&bmul);
REAL **aas[5]={as41,as42,as43,aa4,aa},**bas[5]={bs41,bs42,bs43,ba4,ba}; unsigned cs[5]={0x00C000,0x00C0C0,0x0080C0,0xC000C0,0xFFFFFF};
range_decrease_graphs(aas,bas,5,a,o,cs);
		array_2d_free(aa); array_2d_free(ba);
		REAL be1=be;
		REAL **aaa[8]={as41,as41+ma,as42,as42+ma,as42+ma*2,as43,as43+ma,as43+ma*2},
			**baa[8]={bs41,bs41+ma,bs42,bs42+ma,bs42+ma*2,bs43,bs43+ma,bs43+ma*2};
		{
			REAL bl2,bmul2;
			REAL *b2=FL2Problem_svd_trylambdas_best_many(p, aaa,baa,7+(ver==9),
				(REAL *)ls.a,ls.m, (REAL *)muls.a,muls.m, &be,&bl2,&bmul2);
			if (b2) {if (b) array_1d_free(b); b=b2; bl=bl2; bmul=bmul2;}
		}
		array_2d_free(as41); array_2d_free(bs41);
		array_2d_free(as42); array_2d_free(bs42);
		array_2d_free(as43); array_2d_free(bs43);
		REAL bes=be;
		{
			REAL bl2,bmul2;
			REAL *b2=FL2Problem_svd_trylambdas_best(p, aa4,ba4,
				(REAL *)ls.a,ls.m, (REAL *)muls.a,muls.m, &be,&bl2,&bmul2);
			if (b2) {if (b) array_1d_free(b); b=b2; bl=bl2; bmul=bmul2;}
		}
		array_2d_free(aa4); array_2d_free(ba4);
#ifdef SBCLIB_GRAPHICS
if (xres)
{
sprintf(str,"Rel. dec. 1o %.3lg st %.3lg 4o %.3lg\n(%.3lgx %.3lgx)",
	(eref-be1)/eref,(eref-bes)/eref,(eref-be)/eref,
	(eref-bes)/(eref-be1),(eref-be)/(eref-be1));
bfwrite(xres*3/4,BF_height*7,str,0xFFC000);
}
#endif		
	}
	if (!b)
	{
		//reportwarning("Guess I'm really stuck in FL2Problem_svd"); // This would be blocking
		b=array_1d(m); memcpy(b,a,m*sizeof(REAL)); bl=*plambda*(0.99+frnd(0.02));
	}
profile("finalisation");
	LSet_free(&ls); LSet_free(&muls);
#ifdef SBCLIB_GRAPHICS
if (xres)
{
// Singular values
int py;
for (n=0,py=BF_height*5;n<array_1d_m(s);n+=10,py+=BF_height)
{
	sprintf(str,"s[%d/%d]=%lg",n,array_1d_m(s),s[n]);
	bfwrite(xres/2,py,str,0xC0C0C0);
}
for (n=0;n<array_1d_m(s);n++)
{
	int py=BF_height*5+0.1*n*BF_height;
	putpix(xres+xres/30*log10(s[n]),py,GFX_white);
}
// Cancellation factor
{
	REAL *d=array_1d(m),dd=0,aa=0;
	for (n=0;n<m;n++) {d[n]=b[n]-a[n]; dd+=d[n]*d[n]; aa+=a[n]*a[n];}
	sprintf(str,"Step distance/|a|=%lg",sqrt(dd/aa));
	bfwrite(xres*3/4,BF_height*2,str,0xC0C0C0);	
	REAL t,tt=0,lr,l1each=0,l2each=0;
	for (i=0;i<p->bm;i++)
	{
		t=0; for (n=0;n<m;n++) t+=rm[i][n]*d[n];
		tt+=t*t;
	}
	lr=sqrt(tt);
	for (n=0;n<m;n++)
	{
		tt=0; for (i=0;i<p->bm;i++) tt+=sq(rm[i][n]);
		l1each+=fabs(d[n])*sqrt(tt);
		l2each+=d[n]*d[n]*tt;
	}
	l2each=sqrt(l2each);
	sprintf(str,"lr=%lg leach(L1)=%lg\nCancellation factor(L1)=%lg\nCancellation factor(L2)=%lg",
		lr,l1each,l1each/lr,l2each/lr);
	bfwrite(xres*3/4,BF_height*3,str,0xC0C0C0);
	array_1d_free(d);
}
}
#endif
	array_2d_free(rm); svdfree(s,u,v);
	//for (n=m-1;n>=0;n--) b[n]=a[n]+0.1*(b[n]-a[n]); // True damping (not that useful if you're going around a curve)
	memcpy(a,b,m*sizeof(REAL)); array_1d_free(b);
	*plambda*=pow(bl/(*plambda),0.5);
	if (p->db)
	{ /**/ // The graph seems messed up when doing this smooth
		FL2Problem_eval(p,a,o); /**/ // Only relevant when using db, but could this be eliminated somehow, or done by FL2Problem_svd_trylambdas?
   		FL2Problem_add(p,a,o,1); // <-- some problem (crash) with o here?
   	}
	array_1d_free(o);
#ifdef SBCLIB_GRAPHICS
if (xres)
{
profile_display(xres/2,yres*0.7,!KEY(VK_J)*PROFILE_MERGESAME+KEY(VK_K)*PROFILE_STACKED+KEY(VK_L)*PROFILE_HISTORY);
sprintf(str,"bl=%lg bmul=%lg\neref=%lg be=%lg\n(%lg relative decrease)",bl,bmul,eref,be,(eref-be)/eref);
bfwrite(xres/2,BF_height*2,str,0xC0C0C0);
ortho_off();
}
#endif
}

#include <rnd/rnd.c>

void FL2Problem_relepstest(FL2Problem *p,REAL *a)
{
	int n,m=p->am;
	const int em=5; int bi[em],ai[em];
	for (n=0;n<em;n++) {bi[n]=rnd(p->bm); ai[n]=rnd(m);}
	FILE *out=fopen("FL2Problem_relepstest.csv","at");
	fprintf(out,"releps");
	for (n=0;n<em;n++) fprintf(out,",rm[%d][%d]",bi[n],ai[n]);
	fprintf(out,"\n");
	FILE *out2=fopen("FL2Problem_releps_sv.csv","at");
	fprintf(out2,"releps");
	for (n=0;n<m;n++) fprintf(out2,",sv%d",n);
	fprintf(out2,"\n");
	for (REAL releps=1;releps>1e-15;releps*=0.9)
	{
		REAL **rm=FL2Problem_response_4thorder_multithread(p,a,NULL,releps);
		fprintf(out,"%.16lg",releps);
		for (n=0;n<em;n++) fprintf(out,",%.16lg",rm[bi[n]][ai[n]]);
		fprintf(out,"\n");
		REAL **u,**v,*s=svd(rm,&u,&v);
		fprintf(out2,"%.16lg",releps);
		for (n=0;n<array_1d_m(s);n++) fprintf(out2,",%.16lg",s[n]);
		fprintf(out2,"\n");
		svdfree(s,u,v);
		array_2d_free(rm);
	}
	fclose(out); fclose(out2);
}

void FL2Problem_fresptest(FL2Problem *p,REAL *a)
{
	REAL **rm_fresp=FL2Problem_response(p,a),
		**rm_fd=FL2Problem_response_4thorder_multithread(p,a),
		**rm_fd10=FL2Problem_response_4thorder_multithread(p,a,NULL,1e-6);
	FILE *out=fopen("FL2Problem_fresptest.csv","wt");
	fprintf(out,"fresp:\n");
	array_2d_writeCSV(rm_fresp,out);
	fprintf(out,"\nFinite difference:\n");
	array_2d_writeCSV(rm_fd,out);
	fprintf(out,"\nFinite difference 10x step:\n");
	array_2d_writeCSV(rm_fd10,out);
	fclose(out);
	array_2d_free(rm_fresp); array_2d_free(rm_fd); array_2d_free(rm_fd10);
}

#define FL2C0_optimsvd
#endif
