/**
 * Given a Spanish phrase, break it on words and syllables.
 * @param {string} phrase.  Accented characters are represented as a ` followed
 *   by a vowel or 'n'.
 * @return {Array.<string>} an array of strings that when joined on the empty
 *   string yield the input.  Each element is either a whitespace or punctuation
 *   token which separates words, or a syllable.
 */
var breakSyllables = (function () {

function breakSyllables(phrase) {
  var chunks = breakWords(phrase);
  var syllables = [];
  for (var i = 0, n = chunks.length; i < n; ++i) {
    var chunk = chunks[i];
    if (/[a-z]/i.test(chunk)) {
      breakOneWord(chunk, syllables)
    } else {
      syllables.push(chunk);
    }
  }
  return syllables;
}

function breakWords(phrase) {
  // Separate runs of non word characters such as space and punctuation from
  // words.
  return phrase.match(/[^a-z`]+|(?:[aeioun]`|[a-z])+/ig) || [];
}

var PHONEME = new RegExp(
    ''
    // Dipthongs consist of one strong (a, e, o) and one unaccented weak vowel
    // (i, u, or y at the end).
    + 'a`?i(?!`)|a`?u(?!`)|e`?i(?!`)|ia`?|ie`?|io`?|ua`?|ue`?|ui`?|uo`?'
    + '|o`?i(?!`)|o`?u(?!`)'
    + '|a`?y$|e`?y$|o`?y$'
    // Gue and Gui with or without an umlaut over the u do form a phoneme,
    // so we need to treat them as a phoneme, and then break the 'g' out later
    + '|gu`?[ei]`?'
    // Some consonant pairs are single phonemes
    + '|ch|ll|rr|qu|s[chpt]|[bcdgkpt]r|ng(?!u`?[aeiou])'
    // And a single accented vowel or letter is a phoneme
    + '|[aeioun]`'
    + '|.',
    'gi'
    );
function breakOneWord(word, syllables) {
  // Break roughly into phonemes and then reassemble into syllables.
  var phonemes = word.match(PHONEME);
  // Final break into phonemes.  Split gu`e and gu`i into separate phonemes.
  for (var i = phonemes.length; --i >= 0;) {
    var phoneme = phonemes[i];
    if (/^gu`?[ei]`?$/i.test(phoneme)) {
      phonemes.splice(i, 1, phoneme.substring(0, 1), phoneme.substring(1));
    }
  }
  var n = phonemes.length;
  var isConsonant = [];
  for (var i = n; --i >= 0;) {
    var phoneme = phonemes[i].toLowerCase();
    isConsonant[i] = phoneme === 'qu' || !/[aeiou]/.test(phoneme);
  }
  for (var i = 0; i < n;) {
    var phoneme = phonemes[i];
    var syllableStart = i;
    // A syllable is any number of consonant phonemes followed by one vowel
    // phoneme followed by an optional consonant phoneme.
    while (i < n && isConsonant[i]) { ++i; }
    if (i < n && !isConsonant[i]) { ++i; }
    // Now i cannot be syllableStart since the isConsanant[i] must be true or
    // false and the preceding two statements check for both possibilities.

    // Consume the next consanant if it is not immediately followed by a vowel.
    if (i < n && isConsonant[i] && (i + 1 === n || isConsonant[i + 1])) { ++i; }

    syllables.push(phonemes.slice(syllableStart, i).join(''));
  }
}

return breakSyllables;
})();

/**
 * Like emphasis, but returns null if the emphasis is not made explicit by an
 * accent.
 * @return {int|null} the index of the last syllable between wordStart and
 *     wordEnd with explicit emphasis or null if no such syllable exists between
 *     wordStart and wordEnd.
 */
function explicitEmphasis(syllables, wordStart, wordEnd) {
  var emphasis = null;
  var n = syllables.length;
  for (var i = wordStart; i < wordEnd; ++i) {
    if (indexOfExplicitEmphasis(syllables, i) >= 0) {
      emphasis = i;
    }
  }
  return emphasis;
}

/**
 * The char index in the syllables[syllableIndex] of the '`' mark that
 * explicitly marks syllables[syllableIndex] as emphasized.
 */
function indexOfExplicitEmphasis(syllables, syllableIndex) {
  var syllable = syllables[syllableIndex];
  for (var accent = 0; (accent = syllable.indexOf('`', accent + 1)) >= 0;) {
    var preceder = syllable.substring(0, accent);
    if (!/[aeiou]$/i.test(preceder)) { continue; }
    var follower = syllable.substring(accent + 1);
    // Not if the accent is causing the u to be part of a dipthong.
    if (/gu$/i.test(preceder) && /[ei](?!`)/i.test(follower)) { continue; }
    // Not if the accent is on a weak vowel and there is an adjacent
    // strong vowel in another syllable.
    if (accent + 1 === syllable.length && /[iuy]$/i.test(preceder)
        && syllableIndex + 1 < syllables.length
        && /^[aeo]/i.test(syllables[syllableIndex + 1])) {
      continue;
    }
    if (accent === 1 && /^[iuy]/i.test(preceder) && syllableIndex > 0
        && /[aeo]`?$/i.test(syllables[syllableIndex - 1])) {
      continue;
    }
    return accent;
  }
  return -1;
}

/**
 * Returns the syllable at index syllableIndex of syllables but with any
 * explicit emphasis remove.
 */
function withoutEmphasis(syllables, syllableIndex) {
  var syllable = syllables[syllableIndex];
  var accent = indexOfExplicitEmphasis(syllables, syllableIndex);
  return (
      accent >= 0
      ? syllable.substring(0, accent) + syllable.substring(accent + 1)
      : syllable);
}

function withEmphasis(syllables, syllableIndex) {
  var syllable = syllables[syllableIndex];
  var accent = indexOfExplicitEmphasis(syllables, syllableIndex);
  if (accent >= 0) { return syllable; }
  // If there is a strong vowel, accent it.
  var emphasisedSyllable = syllable.replace(
      /([aeo])`?/i, function (_, vowel) { return vowel + '`'; });
  if (syllable === emphasisedSyllable) {
    // Else if there is a weak vowel, accent it.
    emphasisedSyllable = syllable.replace(
        /(^|[^g])([ui])`?/i,
        function (_, prefix, vowel) { return prefix + vowel + '`'; });
  }
  return emphasisedSyllable;
}

/**
 * Returns the index of the syllable in syllables which should receive emphasis.
 * Does not work for foreign words like "video" that lack proper emphasis.
 * @param {Array.<string>} syllables an array of syllables as from
 *     {@link breakSyllables}.
 * @param {int} wordStart the index in syllables of the first syllable of the
 *     word to check.  Must be greater than or equal to 0.
 * @param {int} wordEnd the index in syllables past the last syllable of the
 *     word to check.  Must be strictly greater than wordStart and less than
 *     or equal to the length of syllables.
 * @return {int} an index in [wordStart, wordEnd - 1].
 */
function emphasis(syllables, wordStart, wordEnd) {
  var emphasis = explicitEmphasis(syllables, wordStart, wordEnd);
  if (emphasis === null) {
    emphasis = Math.max(
        wordStart,
        wordEnd - (/[aeiouyns]$/i.test(syllables[wordEnd - 1]) ? 2 : 1));
  }
  return emphasis;
}

/**
 * Create a new word by replacing the portion of left after the index rootEnd
 * with the right, preserving the consonant sounds in the left.
 * @param {string} left the original word.
 * @param {int} rootEnd the end of the root in left.  Must occur at a phoneme
 *     boundary, i.e. not in the middle of 'ch' or 'ue' or another multi-char
 *     phoneme.
 * @param {string} right the suffix to add to left[0:rootEnd]
 * @param {boolean} preserveEmphasis true if the same syllable in the root that
 *     has emphasis should be emphasized in the output.  If the suffix has
 *     explicit emphasis then it dominates.
 */
function joinWord(left, rootEnd, right, preserveEmphasis) {
  // Normalize spelling if the right starts with a vowel that can change the
  // spelling of the syllable preceding it, or the root used to be followed by
  // such a vowel.
  var oldFollower = rootEnd < left.length ? left.charAt(rootEnd) : '\0';
  var newFollower = right.charAt(0);
  var oldInEI = 'ei'.indexOf(oldFollower) >= 0;
  var newInEI = 'ei'.indexOf(newFollower) >= 0;

  var root = left.substring(0, rootEnd);
  if (oldInEI !== newInEI) {
    if (oldInEI) {
      // When we break before 'e' or 'i' and the suffix starts with neither
      //   ce  ->  za
      //   ge  ->  ja
      root = root.replace(/c$/i, 'z').replace(/g$/i, 'j');
    } else {
      // When we break before not 'e' or 'i' and the suffix starts with one
      //   ja  ->  ge
      //   gua ->  gu`e
      //   ca  ->  que
      //   ga  ->  gue
      //   za  ->  ce
      root = root.replace(/j$/i, 'g').replace(/gu$/i, 'gu`')
          .replace(/c$/i, 'qu').replace(/g$/i, 'gu').replace(/z$/i, 'c');
    }
  }

  // Remove unnecessary accents.
  var newSyllables = breakSyllables(root + right);
  if (preserveEmphasis) {
    var suffixSyllables = breakSyllables(right);
    if (explicitEmphasis(suffixSyllables, 0, suffixSyllables.length) === null) {
      var rootSyllables = breakSyllables(root);
      var rootEmphasis = emphasis(rootSyllables, 0, rootSyllables.length);
      if (emphasis(newSyllables, 0, newSyllables.length) !== rootEmphasis) {
        newSyllables[rootEmphasis] = withEmphasis(newSyllables, rootEmphasis);
      }
    }
  }

  var explEmphasis = explicitEmphasis(newSyllables, 0, newSyllables.length);
  if (explEmphasis !== null) {
    var before = newSyllables[explEmphasis];
    newSyllables[explEmphasis] = withoutEmphasis(newSyllables, explEmphasis);
    var newEmphasis = emphasis(newSyllables, 0, newSyllables.length);
    if (newEmphasis !== explEmphasis) {
      newSyllables[explEmphasis] = before;
    }
  }
  return newSyllables.join('');
}
