'use strict';
let XRegExp = require('xregexp');
const RegEx = XRegExp.default || XRegExp;
// var unidecode = require('unidecode');
var porterStemmer = require('./stemmer/porter_stemmer.js');

var ignoreQuotes =
  '\u0027\u00ab\u00bb\u2018\u2019\u201a\u201b\u201e\u201f\u2039\u203a\u300c\u300d\u300e\u300f\u301d\u301e\u301f\ufE41\ufE42\ufE43\ufE44\uff02\uff07\uff62\uff63';
var tokenizeEnRe = RegEx('[^\\p{Latin}\\d-' + ignoreQuotes + ']+', 'g');
var dashEnRe = RegEx('((^| )-)', 'g');

function stemmer(token) {
  var stem = porterStemmer.stem(token);
  if (!stem) {
    stem = token;
  }
  return stem;
}

function tokenizer(str) {
  str = str.replace(tokenizeEnRe, ' ').replace(dashEnRe, ' ');
  var tokens = str.toLowerCase().split(/\s+/);
  tokens = tokens.filter(function(token) {
    return token.length;
  });
  return tokens;
}

function splitByWords(str) {
  str = str.replace(tokenizeEnRe, ' ').replace(dashEnRe, ' ');
  var tokens = str.split(/\s+/);
  tokens = tokens.filter(function(token) {
    return token.length;
  });
  return tokens;
}

function replaceDiacritic(s) {
  const diacritics = [
    /[\300-\306]/g,
    /[\340-\346]/g, // A, a
    /[\310-\313]/g,
    /[\350-\353]/g, // E, e
    /[\314-\317]/g,
    /[\354-\357]/g, // I, i
    /[\322-\330]/g,
    /[\362-\370]/g, // O, o
    /[\331-\334]/g,
    /[\371-\374]/g, // U, u
    /[\321]/g,
    /[\361]/g, // N, n
    /[\307]/g,
    /[\347]/g // C, c
  ];
  const quotesRe = RegEx('[' + ignoreQuotes + ']+', 'g');

  const chars = [
    'A',
    'a',
    'E',
    'e',
    'I',
    'i',
    'O',
    'o',
    'U',
    'u',
    'N',
    'n',
    'C',
    'c'
  ];

  s = s.replace(quotesRe, '');
  for (var i = 0; i < diacritics.length; i++) {
    s = s.replace(diacritics[i], chars[i]);
  }

  return s;
}

module.exports = {
  stemmer: stemmer,
  tokenizer: tokenizer,
  splitByWords: splitByWords,
  replaceDiacritic: replaceDiacritic
};
