/*
  This file is part of "Scopa" - An italian card game.
  Copyright (C) 2009  Tim Teulings

  This program is free software; you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
  the Free Software Foundation; either version 2 of the License, or
  (at your option) any later version.

  This program is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.

  You should have received a copy of the GNU General Public License
  along with this program; if not, write to the Free Software
  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
*/

#include "PlayerNeuron.h"

#include <cassert>
#include <cstdlib>

#include <iostream>

// We have a neuron for:
// My cards in hand => 40, [0..1]
// Cards on the table => 40, [0..1]
// iteration => 1, [0/17...17/17]
// Me/Oponent:
//   Every card won => 40  [0..1]
//   Number of cards won => 1, [0/40..40/40]
//   Number of sevens we won => 1, [0/4..4/4]
//   Number of dinare we won => 1, 10 [0/10..10/10]
//   Did we catch the dinare 7 => 1, [0..1]
//   Maximum number of points won for each color => 4, [0/21..21/21]
//   Number of scopas => 1, [0/18..18/18]

const size_t handOffset          = 0;
const size_t tableOffset         = handOffset+40;
const size_t iterationOffset     = tableOffset+40;

const size_t wonOffset         = iterationOffset+1;
const size_t wonCountOffset    = wonOffset+40;
const size_t sevensCountOffset = wonCountOffset+1;
const size_t denareCountOffset = sevensCountOffset+1;
const size_t denareSevenOffset = denareCountOffset+1;
const size_t colorPointsOffset = denareSevenOffset+1;
const size_t scopaCountOffset  = colorPointsOffset+4;

const size_t inputCount  = 40+40+1 + 40+1+1+1+1+4+1;
const size_t hiddenCount = 100;
const size_t outputCount = 1;

const double learningRate=0.001;
const double momentum=0.9;
const double gamma=0.7;

void NeuronPlayer::GetInput(std::vector<double>& input, const State& state, size_t round)
{
  input.resize(inputCount);

  // Hand
  for (Card card=Card::minCard; card<=Card::maxCard; ++card) {
    input[handOffset+card.GetIndex()]=0.0;
  }

  for (size_t i=0; i<state.hand.size(); i++) {
    input[handOffset+state.hand[i].GetIndex()]=1.0;
  }

  // Table
  for (Card card=Card::minCard; card<=Card::maxCard; ++card) {
    input[tableOffset+card.GetIndex()]=0.0;
  }

  for (size_t i=0; i<state.table.size(); i++) {
    input[tableOffset+state.table[i].GetIndex()]=1.0;
  }

  input[iterationOffset]=(round-1)/17;

  // Cards won
  for (Card card=Card::minCard; card<=Card::maxCard; ++card) {
    input[wonOffset+card.GetIndex()]=0.0;
  }

  for (size_t i=0; i<state.myCards.size(); i++) {
    input[wonOffset+state.myCards[i].GetIndex()]=1.0;
  }

  for (size_t i=0; i<state.yourCards.size(); i++) {
    input[wonOffset+state.yourCards[i].GetIndex()]=-1.0;
  }

  // Cards won count
  input[wonCountOffset]=(state.myCards.size()-state.yourCards.size())/40.0;

  double count;

  // Number of sevens won
  count=0;
  for (size_t i=0; i<state.myCards.size(); i++) {
    if (state.myCards[i].GetValue()==7) {
      count++;
    }
  }

  for (size_t i=0; i<state.yourCards.size(); i++) {
    if (state.yourCards[i].GetValue()==7) {
      count--;
    }
  }
  input[sevensCountOffset]=count/4.0;

  // Number of denare won
  count=0;
  for (size_t i=0; i<state.myCards.size(); i++) {
    if (state.myCards[i].GetColor()==Card::denare) {
      count++;
    }
  }

  for (size_t i=0; i<state.yourCards.size(); i++) {
    if (state.yourCards[i].GetColor()==Card::denare) {
      count--;
    }
  }
  input[denareCountOffset]=count/10.0;

  // Denare 7 won
  count=0;
  for (size_t i=0; i<state.myCards.size(); i++) {
    if (state.myCards[i].GetValue()==7 &&
        state.myCards[i].GetColor()==Card::denare) {
      count++;
    }
  }

  for (size_t i=0; i<state.yourCards.size(); i++) {
    if (state.yourCards[i].GetValue()==7 &&
        state.yourCards[i].GetColor()==Card::denare) {
      count--;
    }
  }
  input[denareSevenOffset]=count/10.0;

  // Color value count
  size_t mmax[4];
  size_t ymax[4];

  mmax[0]=0;
  mmax[1]=0;
  mmax[2]=0;
  mmax[3]=0;

  for (size_t i=0; i<state.myCards.size(); i++) {
    mmax[state.myCards[i].GetColor()-1]=std::max(mmax[state.myCards[i].GetColor()-1],
                                                 state.myCards[i].GetPoints());
  }

  ymax[0]=0;
  ymax[1]=0;
  ymax[2]=0;
  ymax[3]=0;

  for (size_t i=0; i<state.yourCards.size(); i++) {
    ymax[state.yourCards[i].GetColor()-1]=std::max(ymax[state.yourCards[i].GetColor()-1],
                                                   state.yourCards[i].GetPoints());
  }

  for (size_t i=0; i<4; i++) {
    input[colorPointsOffset+i]=(mmax[i]-ymax[i])/21.0;
  }

  // TODO: Number of scopas
  input[scopaCountOffset]=0.0;
}

NeuronPlayer::NeuronPlayer()
 : nn(inputCount,hiddenCount,1)
{
  // no code
}

void NeuronPlayer::Initialize()
{
  myCards.clear();
  myCards.reserve(40);

  yourCards.clear();
  yourCards.reserve(40);

  oeg.clear();
  oeg.resize(outputCount);

  heg.clear();
  heg.resize(hiddenCount);

  dho.clear();
  dho.resize(hiddenCount);
  for (size_t j=0; j<dho.size(); j++) {
    dho[j].resize(outputCount);
  }

  dih.clear();
  dih.resize(inputCount);
  for (size_t j=0; j<dih.size(); j++) {
    dih[j].resize(hiddenCount);
  }

  inputs.clear();
  inputs.resize(18);
  for (size_t i=0; i<18; i++) {
    inputs[i].resize(inputCount,0);
  }

  output.clear();
  output.resize(18);
  for (size_t i=0; i<18; i++) {
    output[i].resize(outputCount,0);
  }

  expected.clear();
  expected.resize(outputCount,0);

  points.clear();
  points.resize(18);

  rewards.clear();
  rewards.resize(18);
  for (size_t i=0; i<18; i++) {
    rewards[i].resize(outputCount,0);
  }
}

SolutionSet NeuronPlayer::PlayCard(const Table& table, size_t round)
{
  assert(!hand.empty());

  std::vector<SolutionSet> solutions;
  std::vector<State>       states;

  table.GetSolutionSets(hand,solutions);

  assert(solutions.size()>0);

  if (round==18) {
    // We do not evaluate anything for the last move...
    return solutions[0];
  }

  //
  // Step 1: Calculate the new state for each solution
  //
  states.reserve(solutions.size());

  for (size_t i=0; i<solutions.size(); i++) {
    State state;

    // We own the cards we already won...
    state.myCards=myCards;
    state.yourCards=yourCards;

    // Calculate new table as a result of playing this solution
    if (solutions[i].IsTrick()) {
      state.table=table.table;
      state.myCards.push_back(solutions[i].yours);

      for (size_t j=0; j<solutions[i].table.size(); j++) {
        std::vector<Card>::iterator iter;

        iter=state.table.begin();
        while (iter!=state.table.end() && *iter!=solutions[i].table[j]) {
          ++iter;
        }
        assert(iter!=state.table.end());

        // We won this cards on the tables..
        state.myCards.push_back(solutions[i].table[j]);
        // And these cards disapear from the table...
        state.table.erase(iter);
      }
    }
    else {
      state.table=table.table;
      state.table.push_back(solutions[i].yours);
    }

    // Fix hand
    state.hand=hand;
    state.hand.erase(state.hand.begin()+solutions[i].yoursPos);

    states.push_back(state);
  }

  //
  // Step 2: Find the solution with the maximum value...
  //

  std::vector<double>               values;
  std::vector<std::vector<double> > inputs;

  values.resize(states.size());
  inputs.resize(states.size());

  for (size_t i=0; i<states.size(); i++) {
    GetInput(inputs[i],states[i],round);

    for (size_t j=0; j<inputs.size(); j++) {
      nn.SetInput(j,inputs[i][j]);
    }

    nn.CalculateOutputFromInput();

    values[i]=nn.GetOutput(0);
  }

  double maxValue=values[0];
  size_t maxPos=0;

  // In 10% of cases we choose a random move, else we choose the best move
  if (1.0*rand()/RAND_MAX>=0.1) {
    for (size_t i=0; i<states.size(); i++) {
      //std::cout << i << "/" << solutions.size() << " => " << value << std::endl;
      if (values[i]>maxValue) {
        maxPos=i;
        maxValue=values[i];
      }
    }

    //std::cout << " => " << maxPos << "/" << states.size() << std::endl;

  }
  else {
    maxPos=(size_t)(double(states.size())*rand()/RAND_MAX);
  }

  //std::cout << "Value: " << maxValue << std::endl;

  for (size_t j=0; j<inputs[maxPos].size(); j++) {
    nn.SetInput(j,inputs[maxPos][j]);
  }

  this->inputs[round-1]=inputs[maxPos];

  for (size_t k=0; k<outputCount; k++) {
    this->output[round-1][k]=maxValue;
  }

  return solutions[maxPos];
}

void NeuronPlayer::CardsPlayed(std::vector<Card>& cards,
                               bool ownCards,
                               size_t round)
{
  for (size_t i=0; i<cards.size(); i++) {
    if (ownCards) {
      //std::cout << round << ": " << values[round]-((int)pointsMe-(int)pointsOther) << std::endl;
      myCards.push_back(cards[i]);
    }
    else {
      yourCards.push_back(cards[i]);
    }
  }
}

void NeuronPlayer::RoundFinished(size_t round, size_t pointsMe, size_t pointsOther)
{
  static size_t count=0;

  for (size_t k=0; k<outputCount; k++) {
    if (round>1) {
      points[round-1]=(int)pointsMe-(int)pointsOther;
      rewards[round-1][k]=points[round-1]-points[round-2];
    }
    else {
      rewards[round-1][k]=points[round-1];
    }
  }

  if (round!=1) {
    for (size_t k=0; k<outputCount; k++) {
      // We have a maximum ward loss/win of 5 (one scopa and lossing all 4 points)
      expected[k]=points[round-1];//output[round-2][k]+rewards[round-1][k]+gamma*output[round-1][k]-output[round-2][k];

      if (count%100==0) {
        std::cout << "Round: " << round << " " << pointsMe << ":" << pointsOther << " - " << rewards[round-1][k] << " <=> " << output[round-1][k] << " => " << expected[k] << std::endl;
      }
    }
    Learn(expected,output[round-1]);
  }
  else {
    //std::cout << "Round 1:" << std::endl;
  }

  if (count%1000==0 && round==18) {
    for (size_t k=0; k<outputCount; k++) {
      for (size_t j=0; j<hiddenCount; j++) {
        std::cout << "OW(" << k << "," << j << "): " << nn.GetOutputWeight(k,j) << std::endl;

        for (size_t i=0; i<inputCount; i++) {
          std::cout << "HW(" << j << "," << i << "): " << nn.GetHiddenWeight(j,i) << std::endl;
        }
      }
    }
  }

  if (round==18) {
    count++;
  }
}

void NeuronPlayer::Learn(const std::vector<double>& expected, const std::vector<double>& received)
{

  for (size_t k=0; k<outputCount; k++) {
    //std::cout << "Expected " << expected[k] << " => received " << received[k] << std::endl;
    // error gradient for output
    oeg[k]=received[k]*(1-received[k])*(expected[k]-received[k]);

    //std::cout << "oeg[" << k << "]=" << oeg[k] << std::endl;

    for (size_t j=0; j<hiddenCount; j++) {
      // calculate change in weight between hidden and output level
      dho[j][k]=learningRate*nn.GetHidden(j)*oeg[k]+momentum*dho[j][k];
      //std::cout << "dho[" << j << "," << k << "]=" << dho[j][k] << std::endl;
    }
  }

  for (size_t j=0; j<hiddenCount; j++) {
    double weightedSum=0;

    for (size_t k=0; k<outputCount; k++) {
      weightedSum+=nn.GetOutputWeight(k,j)*oeg[k];
    }

    heg[j]=nn.GetHidden(j)*(1-nn.GetHidden(j))*weightedSum;
    //std::cout << "heg[" << j << "]=" << heg[j] << std::endl;

    for (size_t i=0; i<inputCount; i++) {
      dih[i][j]=learningRate*nn.GetInput(i)*heg[j]+momentum*dih[i][j];
    }
  }

  for (size_t i=0; i<inputCount; i++) {
    for (size_t j=0; j<hiddenCount; j++) {
      nn.SetHiddenWeight(j,i,nn.GetHiddenWeight(j,i)+dih[i][j]);
    }
  }

  for (size_t j=0; j<hiddenCount; j++) {
    for (size_t k=0; k<outputCount; k++) {
      nn.SetOutputWeight(k,j,nn.GetOutputWeight(k,j)+dho[j][k]);
    }
  }
}
