import { containsKanji, getCodeKataFromHira, isAnyKana, isHiragana } from 'src/util/kana_conversion'


type SegmentModeType = 'space' | 'kana' | 'kanji' | 'comma'

type WordSegmentType = {
  text: string
  mode: SegmentModeType
  isKanji: boolean
  katakana: string
}

function setArrayIndex(arr_kana_indices: number[], index: number, value: number) {
  // allows setting the same index multiple times as long as value remains always the same
  if (index === arr_kana_indices.length) {
    arr_kana_indices.push(value)
  } else if (index === arr_kana_indices.length - 1) {
    // just check already present value is ok
    if (arr_kana_indices[index] !== value) {
      // should be unreachable
      throw new Error('setArrayIndex ERROR A')
    }
  } else {
    // should be unreachable
    throw new Error('setArrayIndex ERROR B')
  }
}

// export function emptyRuby(text) {
//   eslint-disable-next-line no-irregular-whitespace
//   return <><ruby>　<rt>　</rt></ruby>{text}</>
// }

export function getRuby(text: string, kana: string) {


  // return value: [ruby, msg] where msg is an error message if parsing failed

  // below is default ruby which just puts a single block of katakana on top of a single block of kanji/other text
  const FULL_RUBY = <ruby style={{ color: 'red' }}>{text}<rp>(</rp><rt>{kana}</rt><rp>)</rp></ruby>
  // below is the return value with NO ruby
  const NO_RUBY = text
  // const NO_RUBY = emptyRuby(text)


  // first, we check for trimming and double-space issues as these will interfere with proper parsing
  if (text !== text.trim())
    return [NO_RUBY, 'Japanese name not trimmed']
  if (kana !== kana.trim())
    return [NO_RUBY, 'Kana not trimmed']
  if (text.replaceAll('　', ' ').includes('  '))
    return [NO_RUBY, 'Japanese name contains two consecutive spaces']
  if (kana.replaceAll('　', ' ').includes('  '))
    return [NO_RUBY, 'Kana contains two consecutive spaces']

  if (!containsKanji(text)) {
    // no need for any ruby
    return [NO_RUBY, null]
  }

  try {

    // the katakana are normalized to only contain non-ideographic spaces
    kana = kana.replaceAll('　', ' ')


    const arr: WordSegmentType[] = [] // each item in arr is an object: {text, mode=space|kana|kanji, isKanji=true|false, katakana}
    let mode = ''

    for (let i = 0; i < text.length; i++) {
      const char = text.charAt(i)
      const code = text.charCodeAt(i)
      let newMode: SegmentModeType | null = null
      let katakana = ''
      if (char === ' ' || char === '　') {
        newMode = 'space'
        katakana = ' '
      } else if (char === ',') {
        newMode = 'comma'
        katakana = ','
      } else if (isHiragana(code)) {
        newMode = 'kana'
        const codeKata = getCodeKataFromHira(code);
        katakana = String.fromCharCode(codeKata);
      } else if (isAnyKana(code)) {
        newMode = 'kana'
        katakana = char
      } else {
        newMode = 'kanji'
      }

      if (mode === newMode) {
        const index = arr.length - 1
        const currentItem = arr[index]
        currentItem.text += char
        if (!currentItem.isKanji)
          currentItem.katakana += katakana
      } else {
        const newItem: WordSegmentType = {
          text: char,
          mode: newMode,
          isKanji: newMode === 'kanji',
          katakana: '',
        }
        if (!newItem.isKanji)
          newItem.katakana = katakana
        arr.push(newItem)
      }

      mode = newMode

    } // each character


    const arr_kana_indices = []

    let kana_current_index = 0

    for (let i = 0; i < arr.length; i++) {
      const item = arr[i]
      if (!item.isKanji) {
        const kana_remaining = kana.substring(kana_current_index)
        const index = kana_remaining.indexOf(item.katakana)
        if (index === -1) {
          // problem, couldn't find the kana from the kanji name in the furigana
          // console.log({ err: 'index -1', kana: kana.substring(kana_current_index), kana_index: kana_current_index, itemkatakana: item.katakana })
          return [NO_RUBY, `Couldn't find katakana [${item.katakana}] in string [${kana_remaining}]`]
        }
        const index_full = kana_current_index + index
        setArrayIndex(arr_kana_indices, i, index_full)
        setArrayIndex(arr_kana_indices, i + 1, index_full + item.katakana.length)

        kana_current_index = index_full + item.katakana.length
      } else if (i === 0) {
        arr_kana_indices.push(0)
        kana_current_index = 1
      }
    }

    // check for excess kana at the end
    if (arr_kana_indices.length >= arr.length + 1) {
      // text does not end in kanji, so we can match the end of text to the end of kana
      if (kana.length !== arr_kana_indices[arr.length]) {
        // too many kana at the end
        return [NO_RUBY, `Kana should be removed at end of kana string: [${kana.substring(arr_kana_indices[arr.length])}]`]
      }
    }

    setArrayIndex(arr_kana_indices, arr.length, kana.length)

    const html = []

    for (let i = 0; i < arr.length; i++) {
      const item = arr[i]
      if (item.mode === 'comma')
        continue
      if (!item.isKanji || !containsKanji(item.text)) {
        html.push(item.text)
      } else {
        html.push(<ruby key={i}>{item.text}<rp>(</rp><rt>{kana.substring(arr_kana_indices[i], arr_kana_indices[i + 1])}</rt><rp>)</rp></ruby>)
      }
    }

    return [html, null] // no error

  }
  catch (err) {
    // in general, no errors should be thrown, this probably indicates a bug in the algorithm
    console.error(err)
    return [NO_RUBY, `Ruby exception: ${err}`]
  }

}
