'use strict';

var each = require('lodash/each');
var without = require('lodash/without');
var filter = require('lodash/filter');
var has = require('lodash/has');
var map = require('lodash/map');
var compact = require('lodash/compact');

// var unidecode = require('unidecode');
const unidecode = require('../../unidecode');
let XRegExp = require('xregexp');
XRegExp = XRegExp.default || XRegExp;
var diacriticalMarks;

var tokenizArRe = XRegExp('[\\p{Arabic}\\p{M}]+', 'gi');
var arPhoneticDict = {
  a: ['ا', 'ة', ''],
  b: ['ب'],
  c: ['تش', 'چ‎'], // jshint ignore:line
  d: ['د', 'ض', 'ذ'],
  e: ['ا', 'ي', 'ـيه', 'ع', ''],
  f: ['ف'],
  g: ['ج', 'ق', 'غ'],
  h: ['ه', 'ح', 'خ', ''],
  i: ['ي', ''],
  j: ['ج'],
  k: ['ك', 'خ'],
  l: ['ل'],
  m: ['م'],
  n: ['ن'],
  o: ['و', ''],
  p: ['پ‎'], // jshint ignore:line
  q: ['ق'],
  r: ['ر'],
  s: ['ث', 'ش', 'س', 'ص'],
  t: ['ث', 'ت', 'ة', 'ط'],
  u: ['و', ''],
  v: ['و', 'ڤ‎', 'ڥ‎'], // jshint ignore:line
  w: ['و', 'ڤ‎', 'ڥ‎'], // jshint ignore:line
  x: [''],
  y: ['ي', ''],
  z: ['ذ', 'ظ', 'ز'],
  ʻ: ['ع'],
  ʿ: ['ع'],
  '‘': ['ع'],
  '`': ['ع'],
  ʼ: ['ء', 'أ', 'آ', 'إ', 'ئ', 'ؤ'],
  ʾ: ['ء', 'أ', 'آ', 'إ', 'ئ', 'ؤ'],
  '’': ['ء', 'أ', 'آ', 'إ', 'ئ', 'ؤ'],
  "'": ['ع', 'ء', 'أ', 'آ', 'إ', 'ئ', 'ؤ'],
  combination: {
    dh: ['ذ'],
    th: ['ث'],
    sh: ['ش'],
    ch: ['ش', 'تش', 'چ‎'], // jshint ignore:line
    kh: ['خ'],
    gh: ['غ'],
    tsh: ['تش', 'چ‎'], // jshint ignore:line
    ts: ['تش', 'چ‎'], // jshint ignore:line
    tch: ['تش', 'چ‎'], // jshint ignore:line
    ei: ['ي', 'ـيه'],
    ai: ['ي', 'ـيه'],
    eh: ['ي', 'ـيه'],
    eih: ['ي', 'ـيه'],
    aih: ['ي', 'ـيه'],
    ee: ['ي'],
    ou: ['و'],
    oo: ['و']
  }
};

function _getLetterCombinations(letters, phoneticDict) {
  var combination = [],
    grupedCombinations = [];
  var letterLen = letters.length,
    i,
    j;
  var word = '';
  for (j = 0; j < letterLen; j++) {
    combination = [];
    for (i = 2; i < letterLen; i++) {
      word = letters.slice(j, j + i).join('');
      if (word.length === i && phoneticDict.combination.hasOwnProperty(word)) {
        combination = combination.concat(phoneticDict.combination[word]);
      }
    }
    grupedCombinations.push(combination);
  }
  return grupedCombinations;
}

function _createWords(letters) {
  var currentLetters = letters.splice(0, 1)[0];
  var nextleLetters = letters.splice(0, 1)[0];
  var words = [];
  each(currentLetters, function(currentLetter) {
    var lenCurrentLetters = currentLetter.length - 1;
    each(nextleLetters, function(nextleLetter) {
      if (currentLetter[lenCurrentLetters] === nextleLetter) {
        words.push(currentLetter);
      }
      words.push(currentLetter + nextleLetter);
    });
  });
  letters.unshift(words);
  if (letters.length !== 1) {
    return _createWords(letters);
  } else {
    return letters[0];
  }
}

function levenshtein(str1, str2) {
  if (str1 === null && str2 === null) {
    return 0;
  }
  if (str1 === null) {
    return String(str2).length;
  }
  if (str2 === null) {
    return String(str1).length;
  }

  str1 = String(str1);
  str2 = String(str2);

  var current = [],
    prev,
    value;

  for (var i = 0; i <= str2.length; i++) {
    for (var j = 0; j <= str1.length; j++) {
      if (i && j) {
        if (str1.charAt(j - 1) === str2.charAt(i - 1)) {
          value = prev;
        } else {
          value = Math.min(current[j], current[j - 1], prev) + 1;
        }
      } else {
        value = i + j;
      }

      prev = current[j];
      current[j] = value;
    }
  }

  return current.pop();
}

function getArabicLetter(word) {
  var letters = without(word.split(''), '');
  letters = filter(letters, function(letter) {
    return has(arPhoneticDict, letter);
  });
  var grupedCombinations = _getLetterCombinations(letters, arPhoneticDict);
  letters = map(letters, function(letter, index) {
    var arabicLetters = arPhoneticDict[letter].concat(
      grupedCombinations[index]
    );
    return arabicLetters;
  }).filter(function(item) {
    return item.length !== 0;
  });
  return letters;
}

function isEmptyLetters(letters) {
  return compact(Array.prototype.concat.apply([], letters)).length === 0;
}

function generateWords(words) {
  var response = {};

  words = words.filter(function(word) {
    return word.length < 10;
  });

  each(words, function(word) {
    var wordForms = [];
    var words = {};
    var letters = getArabicLetter(word);

    if (!isEmptyLetters(letters)) {
      wordForms = _createWords(letters);
    }

    // var currentForm;
    // var currentLev;
    // wordForms.forEach(function (wordForm) {
    //    if (!currentForm) {
    //      currentForm =  wordForm;
    //      return;
    //    }
    //    var lev = levenshtein(currentForm, wordForm);

    //    if (!currentLev) {
    //       currentLev = lev;
    //    }

    //    if (!words.hasOwnProperty(lev)) {
    //       words[lev] = wordForm;
    //    }
    // });
    // words = Object.values(words);
    var min;
    wordForms.forEach(function(wordForm) {
      var lev = levenshtein(word, unidecode(wordForm));

      if (!min) {
        min = lev;
      }

      if (lev < min) {
        words = [];
        min = lev;
      }

      if (lev === min) {
        words[wordForm] = null;
      }
    });

    words = Object.keys(words);
    if (words.length !== 0) {
      response[word] = words;
    }
  });

  return response;
}

function soundEx(s) {
  //http://research.ijcaonline.org/volume34/number10/pxc3876054.pdf
  var a = s
    .replace(/^[إآأا]/g, '')
    .replace(/^[aeiouy']{1,}/g, '')
    .split('');
  var r = '',
    codes = {
      ف: 1,
      ب: 1,
      خ: 2,
      ج: 2,
      ز: 2,
      س: 2,
      ص: 2,
      ظ: 2,
      ق: 2,
      ك: 2,
      ت: 3,
      ث: 3,
      د: 3,
      ذ: 3,
      ض: 3,
      ط: 3,
      ل: 4,
      م: 5,
      ن: 5,
      ر: 6,
      b: 1,
      f: 1,
      p: 1,
      v: 1,
      w: 1,
      c: 2,
      g: 2,
      j: 2,
      k: 2,
      q: 2,
      s: 2,
      x: 2,
      z: 2,
      d: 3,
      t: 3,
      l: 4,
      m: 5,
      n: 5,
      r: 6
    };
  var previuseChar = '';
  a.forEach(function(character) {
    if (
      codes.hasOwnProperty(character) &&
      (previuseChar.length === 0 || previuseChar !== codes[character])
    ) {
      previuseChar = codes[character];
      r += codes[character];
    }
  });

  if (r.length !== 0) {
    r = (r + '000').slice(0, 4);
  }
  return r;
}

var _createDiacriticRe = function() {
  var bullit =
    '\\u064b\\u064c\\u064d\\u064e\\u064f\\u0650\\u0651\\u0652\\u0653\\u0670';
  var shadda =
    '\\ufc5e\\ufc5f\\ufc60\\ufc61\\ufc62\\ufc63\\ufe70\\ufe72\\ufe76\\ufe74\\ufe78\\ufe7A\\ufe7c\\ufe7e';
  var kashida =
    '\\ufcf2\\ufcf3\\ufcf4\\ufe71\\ufe77\\ufe79\\ufe7B\\ufe7D\\ufe7f';
  diacriticalMarks = new RegExp('[' + bullit + shadda + kashida + ']+', 'gi');
};

function replaceDiacritic(someString) {
  var diacriticalMarksRe = diacriticalMarks || _createDiacriticRe();
  someString = someString
    .replace(diacriticalMarksRe, '')
    .replace(/\s{2,}/, ' ')
    .trim();
  return someString;
}

function stemmer(input) {
  // var showdebug = 2;
  var isActiveDebugMod = false;
  var stem = input;
  var candidate_roots = []; // jshint ignore:line
  // Stemming step 1. Strip diacritics
  stem = XRegExp.replace(stem, XRegExp('\\p{M}', 'g'), ''); // jshint ignore:line
  if (isActiveDebugMod) {
    console.log('step 1. : ' + input + ' -> ' + stem);
  }
  // Stemming step 2. remove length three and length two prefixes/suffixes in this order
  if (stem.length >= 6) {
    stem = stem.replace(/^(كال|بال|ولل|وال)(.*)$/i, '$2');
  }
  if (stem.length >= 5) {
    stem = stem.replace(/^(ال|لل)(.*)$/i, '$2');
  }
  if (isActiveDebugMod) {
    console.log('step 2. : ' + stem);
  }
  // Stemming step 3. remove length three and length two suffixes in this order
  if (stem.length >= 6) {
    stem = stem.replace(/^(.*)(تما|هما|تان|تين|كما)$/i, '$1');
  }
  if (stem.length >= 5) {
    stem = stem.replace(
      /^(.*)(ون|ات|ان|ين|تن|كم|هن|نا|يا|ها|تم|كن|ني|وا|ما|هم)$/i,
      '$1'
    );
  }
  if (isActiveDebugMod) {
    console.log('step 3. : ' + stem);
  }
  // Stemming step 4. remove initial waw if found
  if (stem.length >= 4) {
    stem = stem.replace(/^وو/i, 'و');
  }
  if (isActiveDebugMod) {
    console.log('step 4. : ' + stem);
  }
  // Stemming step 5. normalize initial hamza to bare alif
  if (stem.length >= 4) {
    stem = stem.replace(/^[آأإ]/i, 'ا');
  }
  if (isActiveDebugMod) {
    console.log('step 5. : ' + stem);
  }
  if (stem.length <= 3) {
    return stem;
  }

  // Stemming step 6. process length four patterns and extract length three roots
  if (stem.length === 6) {
    stem = stem.replace(/^[ام]ست(...)$/i, '$1'); // مستفعل - استفعل
    stem = stem.replace(/^[ام]ست(...)$/i, '$1'); // مستفعل - استفعل
    stem = stem.replace(/^[تم](.)ا(.)ي(.)$/i, '$1$2$3'); // تفاعيل - مفاعيل
    stem = stem.replace(/^م(..)ا(.)ة$/i, '$1$2'); // مفعالة
    stem = stem.replace(/^ا(.)[تط](.)ا(.)$/i, '$1$2$3'); // افتعال
    stem = stem.replace(/^ا(.)(.)و\2(.)$/i, '$1$2$3'); // افعوعل
    if (stem.length === 3) {
      return stem;
    } else {
      stem = stem.replace(/[ةهيكتان]$/i, ''); // single letter suffixes
      //if (stem.length === 4 ) { TODO: initiate 4 letter word routine? }
      //if (stem.length === 5 ) { TODO: initiate 5 letter word routine? }
      stem = stem.replace(/^(..)ا(..)$/i, '$1$2'); // فعالل
      stem = stem.replace(/^ا(...)ا(.)$/i, '$1$2'); // افعلال
      stem = stem.replace(/^مت(.۔..)$/i, '$1'); // متفعلل

      stem = stem.replace(/^[لبفسويتنامك]/i, ''); // single letter prefixes، added م for مفعلل
      if (stem.length === 6) {
        stem = stem.replace(/^(..)ا(.)ي(.)$/i, '$1$2$3'); // فعاليل
      }
      //return stem;
    }
  }
  if (isActiveDebugMod) {
    console.log('after length 6 : ' + stem);
  }

  if (stem.length === 5) {
    stem = stem.replace(/^ا(.)[اتط](.)(.)$/i, '$1$2$3'); //   افتعل   -  افاعل
    stem = stem.replace(/^م(.)(.)[يوا](.)$/i, '$1$2$3'); //   مفعول  -   مفعال  -   مفعيل
    stem = stem.replace(/^[اتم](.)(.)(.)ة$/i, '$1$2$3'); //   مفعلة  -    تفعلة   -  افعلة
    stem = stem.replace(/^[يتم](.)[تط](.)(.)$/i, '$1$2$3'); //   مفتعل  -    يفتعل   -  تفتعل
    stem = stem.replace(/^[تم](.)ا(.)(.)$/i, '$1$2$3'); //   مفاعل  -  تفاعل
    stem = stem.replace(/^(.)(.)[وا](.)ة$/i, '$1$2$3'); //   فعولة  -   فعالة
    stem = stem.replace(/^[ما]ن(.)(.)(.)$/i, '$1$2$3'); //   انفعل   -   منفعل
    stem = stem.replace(/^ا(.)(.)ا(.)$/i, '$1$2$3'); //    افعال
    stem = stem.replace(/^(.)(.)(.)ان$/i, '$1$2$3'); //    فعلان
    stem = stem.replace(/^ت(.)(.)ي(.)$/i, '$1$2$3'); //    تفعيل
    stem = stem.replace(/^(.)ا(.)و(.)$/i, '$1$2$3'); //    فاعول
    stem = stem.replace(/^(.)وا(.)(.)$/i, '$1$2$3'); //    فواعل
    stem = stem.replace(/^(.)(.)ائ(.)$/i, '$1$2$3'); //    فعائل
    stem = stem.replace(/^(.)ا(.)(.)ة$/i, '$1$2$3'); //    فاعلة
    stem = stem.replace(/^(.)(.)ا(.)ي$/i, '$1$2$3'); //    فعالي
    if (stem.length === 3) {
      return stem;
    } else {
      stem = stem.replace(/^[اتم]/i, ''); //    تفعلل - افعلل - مفعلل

      stem = stem.replace(/[ةهيكتان]$/i, ''); // single letter suffixes
      //if (stem.length === 4 ) { TODO: initiate 4 letter word routine? }
      stem = stem.replace(/^(..)ا(..)$/i, '$1$2'); //    فعالل
      stem = stem.replace(/^(...)ا(.)$/i, '$1$2'); //    فعلال
      stem = stem.replace(/^[لبفسويتنامك]/i, ''); // single letter prefixes، added م for مفعلل
      //return stem;
    }
  }
  if (isActiveDebugMod) {
    console.log('after length 5 : ' + stem);
  }

  if (stem.length === 4) {
    stem = stem.replace(/^م(.)(.)(.)$/i, '$1$2$3'); // مفعل
    stem = stem.replace(/^(.)ا(.)(.)$/i, '$1$2$3'); // فاعل
    stem = stem.replace(/^(.)(.)[يوا](.)$/i, '$1$2$3'); // فعال   -   فعول    - فعيل
    stem = stem.replace(/^(.)(.)(.)ة$/i, '$1$2$3'); // فعلة
    if (stem.length === 3) {
      return stem;
    } else {
      stem = stem.replace(/^(.)(.)(.)[ةهيكتان]$/i, '$1$2$3'); // single letter suffixes
      if (stem.length === 3) {
        return stem;
      }
      stem = stem.replace(/^[لبفسويتناك](.)(.)(.)$/i, '$1$2$3'); // single letter prefixes
      //return stem;
    }
  }
  if (isActiveDebugMod) {
    console.log('after length 4 : ' + stem);
  }
  return stem;
}

function tokenizer(str) {
  var tokens = str.match(tokenizArRe);
  return tokens || [];
}

module.exports = {
  generateWords: generateWords,
  soundEx: soundEx,
  replaceDiacritic: replaceDiacritic,
  stemmer: stemmer,
  tokenizer: tokenizer
};
