#ifndef FL2C0_problem
#include <array_1d.c>
#include <array_2d.c>

struct FL2Problem_tag;

typedef void (*FL2Problem_fn)(const REAL *,const int,REAL *,const int,void *); // Can be C arrays not array_1d
typedef void (*FL2ProblemEvals_fn)(const REAL * const *,const int,REAL **,const int,const int,void *);
typedef REAL **(*FL2Response_fn)(struct FL2Problem_tag *,const REAL *,REAL **); // Returns an array_2d and an optional array_1d
typedef void (*FL2Database_fn)(void *,struct FL2Problem_tag *,const REAL *,const REAL *,const int);

typedef struct FL2Problem_tag
{
	FL2Problem_fn f;
	FL2ProblemEvals_fn fs; // Can be NULL (uses CPU multithreading)
	FL2Response_fn fresp; // Can be NULL (uses finite differences)
	int am,bm;
	void *data; // Can be NULL (if f requires no data)
	FL2Database_fn db; // Can be NULL (no data logging)
	void *dbdata; // Can be NULL (if db is NULL)
} FL2Problem;

FL2Problem FL2Problem_new(const FL2Problem_fn f,const int am,const int bm,void * const data=NULL)
{
	FL2Problem ret; ret.f=f; ret.fs=NULL; ret.fresp=NULL; ret.am=am; ret.bm=bm; ret.data=data;
	ret.db=NULL; ret.dbdata=NULL;
	return ret;
}

void FL2Problem_database(FL2Problem *p,const FL2Database_fn db,void * const dbdata=NULL)
{
	p->db=db; p->dbdata=dbdata;
}

void FL2Problem_add(FL2Problem *p,const REAL *a,const REAL *b,const int priority)
{
	if (p->db) p->db(p->dbdata,p,a,b,priority);
}

void FL2Problem_eval(FL2Problem *p,const REAL *a,REAL *b)
{
	p->f(a,p->am,b,p->bm,p->data);
	FL2Problem_add(p,a,b,0);
}

#include <parallel.c>

typedef struct
{
	FL2Problem *p; const REAL * const *a; REAL **b;
} FL2Problem_evals_args;

unsigned FL2Problem_evals_thread(ParallelInfo *pa)
{
	FL2Problem_evals_args *args=(FL2Problem_evals_args *)pa->args;
	FL2Problem *p=args->p; const REAL * const *a=args->a; REAL **b=args->b;
	for (int n=pa->n0;n<pa->n1;n++) p->f(a[n],p->am,b[n],p->bm,p->data);
	return 0;
}

void FL2Problem_evals_multithread(FL2Problem *p,const REAL * const *a,REAL **b,const int m)
{ // a,b can still be 2D C arrays not array_2d
	FL2Problem_evals_args args; args.p=p; args.a=a; args.b=b;
	parallel(FL2Problem_evals_thread,0,m,&args);
	//for (int n=0;n<m;n++) p->f(a[n],p->am,b[n],p->bm,p->data); // Single-threaded equivalent for debugging
	for (int n=0;n<m;n++) FL2Problem_add(p,a[n],b[n],0); // Database operation not multithreaded
}

int FL2Problem_singlethread=0;

void FL2Problem_evals(FL2Problem *p,const REAL * const *a,REAL **b,const int m)
{
	if (p->fs) p->fs(a,p->am,b,p->bm,m,p->data);
	else if (FL2Problem_singlethread) for (int n=m-1;n>=0;n--) FL2Problem_eval(p,a[n],b[n]);
	else FL2Problem_evals_multithread(p,a,b,m);
}

REAL **FL2Problem_response_asym_singlethread(FL2Problem *p,const REAL *a,REAL **po=NULL,
	const REAL releps=1e-6)
{ // Faster but asymmetric (0.5*eps error in deriv eval position)
	int n,i;
	REAL eps=1;
	for (n=p->am-1;n>=0;n--) eps=Max(eps,fabs(a[n]));
	eps*=releps;
	REAL *o=array_1d(p->bm); FL2Problem_eval(p,a,o);
	REAL *b=array_1d(p->am),*x=array_1d(p->bm);
	memcpy(b,a,p->am*sizeof(REAL));
	REAL **ret=array_2d(p->bm,p->am);
	for (n=p->am-1;n>=0;n--)
	{
		b[n]+=eps;
		FL2Problem_eval(p,b,x);
		for (i=p->bm-1;i>=0;i--) ret[i][n]=(x[i]-o[i])/eps;
		b[n]=a[n];
	}
	array_1d_free(b); array_1d_free(x);
	if (po) *po=o; else array_1d_free(o);
	return ret;
}

REAL **FL2Problem_response_asym_multithread(FL2Problem *p,const REAL *a,REAL **po=NULL,
	const REAL releps=1e-6)
{ // Faster but asymmetric (0.5*eps error in deriv eval position)
	int n,i;
	REAL eps=1;
	for (n=p->am-1;n>=0;n--) eps=Max(eps,fabs(a[n]));
	eps*=releps;
	int m=1+p->am;
	REAL **aa=array_2d(m,p->am),**ba=array_2d(m,p->bm);
	memcpy(aa[0],a,p->am*sizeof(REAL));
	for (n=p->am-1;n>=0;n--)
	{
		memcpy(aa[1+n],a,p->am*sizeof(REAL));
		aa[1+n][n]+=eps;
	}
	FL2Problem_evals(p,aa,ba,m);
	REAL *o=ba[0],*x;
	REAL **ret=array_2d(p->bm,p->am);
	for (n=p->am-1;n>=0;n--)
	{
		x=ba[1+n];
		for (i=p->bm-1;i>=0;i--) ret[i][n]=(x[i]-o[i])/eps;
	}
	if (po) {*po=array_1d(p->bm); memcpy(*po,o,p->bm*sizeof(REAL));}
	array_2d_free(aa); array_2d_free(ba);
	return ret;
}

REAL **FL2Problem_response_sym_singlethread(FL2Problem *p,const REAL *a,REAL **po=NULL,
	const REAL releps=1e-6)
{ // Symmetric version but 2x slower than _asym
	int n,i;
	REAL eps=1;
	for (n=p->am-1;n>=0;n--) eps=Max(eps,fabs(a[n]));
	eps*=releps;
	REAL *b=array_1d(p->am),*x=array_1d(p->bm),*xd=array_1d(p->bm);
	memcpy(b,a,p->am*sizeof(REAL));
	REAL **ret=array_2d(p->bm,p->am);
	for (n=p->am-1;n>=0;n--)
	{
		b[n]=a[n]+eps;
		FL2Problem_eval(p,b,x);
		b[n]=a[n]-eps;
		FL2Problem_eval(p,b,xd);
		for (i=p->bm-1;i>=0;i--) ret[i][n]=(x[i]-xd[i])/(eps+eps);
		b[n]=a[n];
	}
	array_1d_free(b); array_1d_free(x); array_1d_free(xd);
	if (po) {*po=array_1d(p->bm); FL2Problem_eval(p,a,*po);}
	return ret;
}

REAL **FL2Problem_response_sym_multithread(FL2Problem *p,const REAL *a,REAL **po=NULL,
	const REAL releps=1e-6)
{ // Symmetric version but 2x slower than _asym
	int n,i;
	REAL eps=1;
	for (n=p->am-1;n>=0;n--) eps=Max(eps,fabs(a[n]));
	eps*=releps;
	int m=p->am*2; if (po) m++;
	REAL **aa=array_2d(m,p->am),**ba=array_2d(m,p->bm);
	for (n=p->am-1;n>=0;n--)
	{
		memcpy(aa[n*2],a,p->am*sizeof(REAL)); aa[n*2][n]+=eps;
		memcpy(aa[n*2+1],a,p->am*sizeof(REAL)); aa[n*2+1][n]-=eps;
	}
	if (po) memcpy(aa[p->am*2],a,p->am*sizeof(REAL));
	FL2Problem_evals(p,aa,ba,m);
	REAL **ret=array_2d(p->bm,p->am),*x,*xd;
	for (n=p->am-1;n>=0;n--)
	{
		x=ba[n*2]; xd=ba[n*2+1];
		for (i=p->bm-1;i>=0;i--) ret[i][n]=(x[i]-xd[i])/(eps+eps);
	}
	if (po) {*po=array_1d(p->bm); memcpy(*po,ba[p->am*2],p->bm*sizeof(REAL));}
	array_2d_free(aa); array_2d_free(ba);
	return ret;
}

REAL **FL2Problem_response_4thorder_singlethread(FL2Problem *p,const REAL *a,REAL **po=NULL,
	const REAL releps=1e-7)
{ // 4th order accurate, 4x slower than _asym (untested)
	int n,i;
	REAL eps=1;
	for (n=p->am-1;n>=0;n--) eps=Max(eps,fabs(a[n]));
	eps*=releps;
	REAL *b=array_1d(p->am),
		*x2=array_1d(p->bm),*x1=array_1d(p->bm),*xd1=array_1d(p->bm),*xd2=array_1d(p->bm);
	memcpy(b,a,p->am*sizeof(REAL));
	REAL **ret=array_2d(p->bm,p->am);
	for (n=p->am-1;n>=0;n--)
	{
		b[n]=a[n]+eps*2;
		FL2Problem_eval(p,b,x2);
		b[n]=a[n]+eps;
		FL2Problem_eval(p,b,x1);
		b[n]=a[n]-eps;
		FL2Problem_eval(p,b,xd1);
		b[n]=a[n]-eps*2;
		FL2Problem_eval(p,b,xd2);
		for (i=p->bm-1;i>=0;i--) ret[i][n]=(xd2[i]-x2[i]+(x1[i]-xd1[i])*8)/(eps*12);
		b[n]=a[n];
	}
	array_1d_free(b);
	array_1d_free(x2); array_1d_free(x1); array_1d_free(xd1); array_1d_free(xd2);
	if (po) {*po=array_1d(p->bm); FL2Problem_eval(p,a,*po);}
	return ret;
}

REAL **FL2Problem_response_4thorder_multithread(FL2Problem *p,const REAL *a,REAL **po=NULL,
	const REAL releps=1e-7)
{ // 4th order accurate, 4x slower than _asym (untested)
	int n,i;
	REAL eps=1;
	for (n=p->am-1;n>=0;n--) eps=Max(eps,fabs(a[n]));
	eps*=releps;
	int m=p->am*4; if (po) m++;
	REAL **aa=array_2d(m,p->am),**ba=array_2d(m,p->bm);
	for (n=p->am-1;n>=0;n--)
	{
		memcpy(aa[n*4],a,p->am*sizeof(REAL)); aa[n*4][n]+=eps*2;
		memcpy(aa[n*4+1],a,p->am*sizeof(REAL)); aa[n*4+1][n]+=eps;
		memcpy(aa[n*4+2],a,p->am*sizeof(REAL)); aa[n*4+2][n]-=eps;
		memcpy(aa[n*4+3],a,p->am*sizeof(REAL)); aa[n*4+3][n]-=eps*2;
	}
	if (po) memcpy(aa[p->am*4],a,p->am*sizeof(REAL));
	FL2Problem_evals(p,aa,ba,m);
	REAL **ret=array_2d(p->bm,p->am),*x2,*x1,*xd1,*xd2;
	for (n=p->am-1;n>=0;n--)
	{
		x2=ba[n*4]; x1=ba[n*4+1]; xd1=ba[n*4+2]; xd2=ba[n*4+3];
		for (i=p->bm-1;i>=0;i--) ret[i][n]=(xd2[i]-x2[i]+(x1[i]-xd1[i])*8)/(eps*12);
	}
	if (po) {*po=array_1d(p->bm); memcpy(*po,ba[p->am*4],p->bm*sizeof(REAL));}
	array_2d_free(aa); array_2d_free(ba);
	return ret;
}

int FL2Problem_resp4thorder=1;

REAL **FL2Problem_response(FL2Problem *p,const REAL *a,REAL **po=NULL)
{
	if (p->fresp) return p->fresp(p,a,po);
	else if (FL2Problem_resp4thorder)
	{
		if (FL2Problem_singlethread) return FL2Problem_response_4thorder_singlethread(p,a,po);
		else return FL2Problem_response_4thorder_multithread(p,a,po);
	}
	else // FL2Problem_resp4thorder==0
	{
		if (FL2Problem_singlethread) return FL2Problem_response_sym_singlethread(p,a,po);
		else return FL2Problem_response_sym_multithread(p,a,po);
	}
}

REAL FL2Problem_errornorm(const REAL *b,const int bm)
{ 
	REAL tt=0;
	for (int n=bm-1;n>=0;n--) tt+=b[n]*b[n];
	return sqrt(tt);
}

REAL FL2Problem_evalerrornorm(FL2Problem *p,const REAL *a,REAL *scratch)
{ // scratch=REAL[p->bm]
	FL2Problem_eval(p,a,scratch);
	return FL2Problem_errornorm(scratch,p->bm);
}

REAL FL2Problem_evalerrornormslow(FL2Problem *p,const REAL *a)
{
	REAL *scratch=array_1d(p->bm);
	REAL ret=FL2Problem_evalerrornorm(p,a,scratch);
	array_1d_free(scratch);
	return ret;
}

#define FL2C0_problem
#endif
