/*
 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * on the rights to use, copy, modify, merge, publish, distribute, sub
 * license, and/or sell copies of the Software, and to permit persons to whom
 * the Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
 * USE OR OTHER DEALINGS IN THE SOFTWARE.
 *
 * Authors:
 *      Vadim Girlin
 */

#define PPH_DEBUG 0

#if PPH_DEBUG
#define PPH_DUMP(q) do { q } while (0)
#else
#define PPH_DUMP(q)
#endif

#include "sb_shader.h"
#include "sb_pass.h"

namespace r600_sb {

int peephole::run() {

	run_on(sh.root);

	return 0;
}

void peephole::run_on(container_node* c) {

	for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) {
		node *n = *I;

		if (n->is_container())
			run_on(static_cast<container_node*>(n));
		else {

			if (n->is_alu_inst()) {
				alu_node *a = static_cast<alu_node*>(n);

				if (a->bc.op_ptr->flags &
						(AF_PRED | AF_SET | AF_CMOV | AF_KILL)) {
					optimize_cc_op(a);
				} else if (a->bc.op == ALU_OP1_FLT_TO_INT) {

					alu_node *s = a;
					if (get_bool_flt_to_int_source(s)) {
						convert_float_setcc(a, s);
					}
				}
			}
		}
	}
}

void peephole::optimize_cc_op(alu_node* a) {
	unsigned aflags = a->bc.op_ptr->flags;

	if (aflags & (AF_PRED | AF_SET | AF_KILL)) {
		optimize_cc_op2(a);
	} else if (aflags & AF_CMOV) {
		optimize_CNDcc_op(a);
	}
}

void peephole::convert_float_setcc(alu_node *f2i, alu_node *s) {
	alu_node *ns = sh.clone(s);

	ns->dst[0] = f2i->dst[0];
	ns->dst[0]->def = ns;
	ns->bc.set_op(ns->bc.op + (ALU_OP2_SETE_DX10 - ALU_OP2_SETE));
	f2i->insert_after(ns);
	f2i->remove();
}

void peephole::optimize_cc_op2(alu_node* a) {

	unsigned flags = a->bc.op_ptr->flags;
	unsigned cc = flags & AF_CC_MASK;

	if ((cc != AF_CC_E && cc != AF_CC_NE) || a->pred)
		return;

	unsigned cmp_type = flags & AF_CMP_TYPE_MASK;
	unsigned dst_type = flags & AF_DST_TYPE_MASK;

	int op_kind = (flags & AF_PRED) ? 1 :
			(flags & AF_SET) ? 2 :
			(flags & AF_KILL) ? 3 : 0;

	bool swapped = false;

	if (a->src[0]->is_const() && a->src[0]->literal_value == literal(0)) {
		std::swap(a->src[0],a->src[1]);
		swapped = true;
		// clear modifiers
		memset(&a->bc.src[0], 0, sizeof(bc_alu_src));
		memset(&a->bc.src[1], 0, sizeof(bc_alu_src));
	}

	if (swapped || (a->src[1]->is_const() &&
			a->src[1]->literal_value == literal(0))) {

		value *s = a->src[0];

		bool_op_info bop = {};

		PPH_DUMP(
			sblog << "cc_op2: ";
			dump::dump_op(a);
			sblog << "\n";
		);

		if (!get_bool_op_info(s, bop))
			return;

		if (cc == AF_CC_E)
			bop.invert = !bop.invert;

		bool swap_args = false;

		cc = bop.n->bc.op_ptr->flags & AF_CC_MASK;

		if (bop.invert)
			cc = invert_setcc_condition(cc, swap_args);

		if (bop.int_cvt) {
			assert(cmp_type != AF_FLOAT_CMP);
			cmp_type = AF_FLOAT_CMP;
		}

		PPH_DUMP(
			sblog << "boi node: ";
			dump::dump_op(bop.n);
			sblog << " invert: " << bop.invert << "  int_cvt: " << bop.int_cvt;
			sblog <<"\n";
		);

		unsigned newop;

		switch(op_kind) {
		case 1:
			newop = get_predsetcc_op(cc, cmp_type);
			break;
		case 2:
			newop = get_setcc_op(cc, cmp_type, dst_type != AF_FLOAT_DST);
			break;
		case 3:
			newop = get_killcc_op(cc, cmp_type);
			break;
		default:
			newop = ALU_OP0_NOP;
			assert(!"invalid op kind");
			break;
		}

		a->bc.set_op(newop);

		if (swap_args) {
			a->src[0] = bop.n->src[1];
			a->src[1] = bop.n->src[0];
			a->bc.src[0] = bop.n->bc.src[1];
			a->bc.src[1] = bop.n->bc.src[0];

		} else {
			a->src[0] = bop.n->src[0];
			a->src[1] = bop.n->src[1];
			a->bc.src[0] = bop.n->bc.src[0];
			a->bc.src[1] = bop.n->bc.src[1];
		}
	}
}

void peephole::optimize_CNDcc_op(alu_node* a) {
	unsigned flags = a->bc.op_ptr->flags;
	unsigned cc = flags & AF_CC_MASK;
	unsigned cmp_type = flags & AF_CMP_TYPE_MASK;
	bool swap = false;

	if (cc == AF_CC_E) {
		swap = !swap;
		cc = AF_CC_NE;
	} else if (cc != AF_CC_NE)
		return;

	value *s = a->src[0];

	bool_op_info bop = {};

	PPH_DUMP(
		sblog << "cndcc: ";
		dump::dump_op(a);
		sblog << "\n";
	);

	if (!get_bool_op_info(s, bop))
		return;

	alu_node *d = bop.n;

	if (d->bc.omod)
		return;

	PPH_DUMP(
		sblog << "cndcc def: ";
		dump::dump_op(d);
		sblog << "\n";
	);


	unsigned dflags = d->bc.op_ptr->flags;
	unsigned dcc = dflags & AF_CC_MASK;
	unsigned dcmp_type = dflags & AF_CMP_TYPE_MASK;
	unsigned ddst_type = dflags & AF_DST_TYPE_MASK;
	int nds;

	// TODO we can handle some of these cases,
	// though probably this shouldn't happen
	if (cmp_type != AF_FLOAT_CMP && ddst_type == AF_FLOAT_DST)
		return;

	if (d->src[0]->is_const() && d->src[0]->literal_value == literal(0))
		nds = 1;
	else if ((d->src[1]->is_const() &&
			d->src[1]->literal_value == literal(0)))
		nds = 0;
	else
		return;

	// can't propagate ABS modifier to CNDcc because it's OP3
	if (d->bc.src[nds].abs)
		return;

	// TODO we can handle some cases for uint comparison
	if (dcmp_type == AF_UINT_CMP)
		return;

	if (dcc == AF_CC_NE) {
		dcc = AF_CC_E;
		swap = !swap;
	}

	if (nds == 1) {
		switch (dcc) {
		case AF_CC_GT: dcc = AF_CC_GE; swap = !swap; break;
		case AF_CC_GE: dcc = AF_CC_GT; swap = !swap; break;
		default: break;
		}
	}

	a->src[0] = d->src[nds];
	a->bc.src[0] = d->bc.src[nds];

	if (swap) {
		std::swap(a->src[1], a->src[2]);
		std::swap(a->bc.src[1], a->bc.src[2]);
	}

	a->bc.set_op(get_cndcc_op(dcc, dcmp_type));

}

bool peephole::get_bool_flt_to_int_source(alu_node* &a) {

	if (a->bc.op == ALU_OP1_FLT_TO_INT) {

		if (a->bc.src[0].neg || a->bc.src[0].abs || a->bc.src[0].rel)
			return false;

		value *s = a->src[0];
		if (!s || !s->def || !s->def->is_alu_inst())
			return false;

		alu_node *dn = static_cast<alu_node*>(s->def);

		if (dn->is_alu_op(ALU_OP1_TRUNC)) {
			s = dn->src[0];
			if (!s || !s->def || !s->def->is_alu_inst())
				return false;

			if (dn->bc.src[0].neg != 1 || dn->bc.src[0].abs != 0 ||
					dn->bc.src[0].rel != 0) {
				return false;
			}

			dn = static_cast<alu_node*>(s->def);

		}

		if (dn->bc.op_ptr->flags & AF_SET) {
			a = dn;
			return true;
		}
	}
	return false;
}

bool peephole::get_bool_op_info(value* b, bool_op_info& bop) {

	node *d = b->def;

	if (!d || !d->is_alu_inst())
		return false;

	alu_node *dn = static_cast<alu_node*>(d);

	if (dn->bc.op_ptr->flags & AF_SET) {
		bop.n = dn;

		if (dn->bc.op_ptr->flags & AF_DX10)
			bop.int_cvt = true;

		return true;
	}

	if (get_bool_flt_to_int_source(dn)) {
		bop.n = dn;
		bop.int_cvt = true;
		return true;
	}

	return false;
}

} // namespace r600_sb