/*
	This file is part of cpp-ethereum.

	cpp-ethereum is free software: you can redistribute it and/or modify
	it under the terms of the GNU General Public License as published by
	the Free Software Foundation, either version 3 of the License, or
	(at your option) any later version.

	cpp-ethereum is distributed in the hope that it will be useful,
	but WITHOUT ANY WARRANTY; without even the implied warranty of
	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
	GNU General Public License for more details.

	You should have received a copy of the GNU General Public License
	along with cpp-ethereum.  If not, see <http://www.gnu.org/licenses/>.
*/
/**
 * @file ExpressionClasses.h
 * @author Christian <c@ethdev.com>
 * @date 2015
 * Container for equivalence classes of expressions for use in common subexpression elimination.
 */

#pragma once

#include <vector>
#include <map>
#include <memory>
#include <libdevcore/Common.h>
#include <libevmasm/AssemblyItem.h>

namespace dev
{
namespace eth
{

class Pattern;
struct ExpressionTemplate;

/**
 * Collection of classes of equivalent expressions that can also determine the class of an expression.
 * Identifiers are contiguously assigned to new classes starting from zero.
 */
class ExpressionClasses
{
public:
	using Id = unsigned;
	using Ids = std::vector<Id>;

	struct Expression
	{
		Id id;
		AssemblyItem const* item = nullptr;
		Ids arguments;
		/// Storage modification sequence, only used for storage and memory operations.
		unsigned sequenceNumber = 0;
		/// Behaves as if this was a tuple of (item->type(), item->data(), arguments, sequenceNumber).
		bool operator<(Expression const& _other) const;
	};

	/// Retrieves the id of the expression equivalence class resulting from the given item applied to the
	/// given classes, might also create a new one.
	/// @param _copyItem if true, copies the assembly item to an internal storage instead of just
	/// keeping a pointer.
	/// The @a _sequenceNumber indicates the current storage or memory access sequence.
	Id find(
		AssemblyItem const& _item,
		Ids const& _arguments = {},
		bool _copyItem = true,
		unsigned _sequenceNumber = 0
	);
	/// @returns the canonical representative of an expression class.
	Expression const& representative(Id _id) const { return m_representatives.at(_id); }
	/// @returns the number of classes.
	Id size() const { return m_representatives.size(); }

	/// Forces the given @a _item with @a _arguments to the class @a _id. This can be used to
	/// add prior knowledge e.g. about CALLDATA, but has to be used with caution. Will not work as
	/// expected if @a _item applied to @a _arguments already exists.
	void forceEqual(Id _id, AssemblyItem const& _item, Ids const& _arguments, bool _copyItem = true);

	/// @returns the id of a new class which is different to all other classes.
	Id newClass(SourceLocation const& _location);

	/// @returns true if the values of the given classes are known to be different (on every input).
	/// @note that this function might still return false for some different inputs.
	bool knownToBeDifferent(Id _a, Id _b);
	/// Similar to @a knownToBeDifferent but require that abs(_a - b) >= 32.
	bool knownToBeDifferentBy32(Id _a, Id _b);
	/// @returns true if the value of the given class is known to be zero.
	/// @note that this is not the negation of knownNonZero
	bool knownZero(Id _c);
	/// @returns true if the value of the given class is known to be nonzero.
	/// @note that this is not the negation of knownZero
	bool knownNonZero(Id _c);
	/// @returns a pointer to the value if the given class is known to be a constant,
	/// and a nullptr otherwise.
	u256 const* knownConstant(Id _c);

	/// Stores a copy of the given AssemblyItem and returns a pointer to the copy that is valid for
	/// the lifetime of the ExpressionClasses object.
	AssemblyItem const* storeItem(AssemblyItem const& _item);

	std::string fullDAGToString(Id _id) const;

private:
	/// Tries to simplify the given expression.
	/// @returns its class if it possible or Id(-1) otherwise.
	/// @param _secondRun is set to true for the second run where arguments of commutative expressions are reversed
	Id tryToSimplify(Expression const& _expr, bool _secondRun = false);

	/// Rebuilds an expression from a (matched) pattern.
	Id rebuildExpression(ExpressionTemplate const& _template);

	std::vector<std::pair<Pattern, std::function<Pattern()>>> createRules() const;

	/// Expression equivalence class representatives - we only store one item of an equivalence.
	std::vector<Expression> m_representatives;
	/// All expression ever encountered.
	std::set<Expression> m_expressions;
	std::vector<std::shared_ptr<AssemblyItem>> m_spareAssemblyItems;
};

/**
 * Pattern to match against an expression.
 * Also stores matched expressions to retrieve them later, for constructing new expressions using
 * ExpressionTemplate.
 */
class Pattern
{
public:
	using Expression = ExpressionClasses::Expression;
	using Id = ExpressionClasses::Id;

	// Matches a specific constant value.
	Pattern(unsigned _value): Pattern(u256(_value)) {}
	// Matches a specific constant value.
	Pattern(u256 const& _value): m_type(Push), m_requireDataMatch(true), m_data(_value) {}
	// Matches a specific assembly item type or anything if not given.
	Pattern(AssemblyItemType _type = UndefinedItem): m_type(_type) {}
	// Matches a given instruction with given arguments
	Pattern(Instruction _instruction, std::vector<Pattern> const& _arguments = {});
	/// Sets this pattern to be part of the match group with the identifier @a _group.
	/// Inside one rule, all patterns in the same match group have to match expressions from the
	/// same expression equivalence class.
	void setMatchGroup(unsigned _group, std::map<unsigned, Expression const*>& _matchGroups);
	unsigned matchGroup() const { return m_matchGroup; }
	bool matches(Expression const& _expr, ExpressionClasses const& _classes) const;

	AssemblyItem toAssemblyItem(SourceLocation const& _location) const;
	std::vector<Pattern> arguments() const { return m_arguments; }

	/// @returns the id of the matched expression if this pattern is part of a match group.
	Id id() const { return matchGroupValue().id; }
	/// @returns the data of the matched expression if this pattern is part of a match group.
	u256 const& d() const { return matchGroupValue().item->data(); }

	std::string toString() const;

private:
	bool matchesBaseItem(AssemblyItem const* _item) const;
	Expression const& matchGroupValue() const;

	AssemblyItemType m_type;
	bool m_requireDataMatch = false;
	u256 m_data = 0;
	std::vector<Pattern> m_arguments;
	unsigned m_matchGroup = 0;
	std::map<unsigned, Expression const*>* m_matchGroups = nullptr;
};

/**
 * Template for a new expression that can be built from matched patterns.
 */
struct ExpressionTemplate
{
	using Expression = ExpressionClasses::Expression;
	using Id = ExpressionClasses::Id;
	explicit ExpressionTemplate(Pattern const& _pattern, SourceLocation const& _location);
	std::string toString() const;
	bool hasId = false;
	/// Id of the matched expression, if available.
	Id id = Id(-1);
	// Otherwise, assembly item.
	AssemblyItem item = UndefinedItem;
	std::vector<ExpressionTemplate> arguments;
};

}
}