//validate input text
export const validateVariant = function (strings_array_in) {
    //check that the input is not a question or text
    let wordsArray = strings_array_in.split(/[\s,']/);
    wordsArray = wordsArray.filter((str) => str !== "");
    if(/[?!.]$/.test(wordsArray[wordsArray.length - 1])){
        wordsArray = wordsArray.slice(0, wordsArray.length-1)
    }
    const onlyTextWords = wordsArray.filter((str) => /^[a-zA-Z]+$/.test(str));

    if ((wordsArray.length < 8 && onlyTextWords.length < 6) && !(wordsArray.length === onlyTextWords.length && wordsArray.length > 2)) { //if the text contains at least 8 words and more than 60% of the words contain only letters the validation fails (else condition), or if all the word are only text word, or are only text word and finish with "?!."  (more than 2)
        strings_array_in = strings_array_in.split(/[:\s\t(),;]/); //split with regex
        strings_array_in = strings_array_in.filter((str) => str !== "" && !str.toUpperCase().startsWith('NP_') && !/^ENSP(\d){4,}/.test(str.toUpperCase()));

        //Tratto tutti gli elementi come fossero array, almeno se l'utente scrive 2 volte ad es. un rs questo non viene considerato come un gene
        //genomic coordinates
        const assemblyArray = strings_array_in.filter((str) => /^(GRCH37|GRCH38|HG19|37|38)$/.test(str.toUpperCase()));
        const assembly = assemblyArray[0] !== undefined ? assemblyArray[0].toUpperCase().replace('GRCH', '').replace('HG19', '37') : '';
        const chromArray = strings_array_in.filter((str) => /^([1]\d|[1-9]|2[0-2]|[X]|[Y]|(MT)|M|CHR([1]\d|[1-9]|2[0-2]|[X]|[Y]|(MT)|M))$/.test(str.toUpperCase()));
        const chrom = chromArray[0] !== undefined ? (chromArray[0].toUpperCase().replace('CHR', '') === 'M' ? 'MT' : chromArray[0].toUpperCase().replace('CHR', '')) : ''; //formatto diversamente il cromosoma perché il BE si aspetta sempre MT in caso di mito
        const chromIndex = chromArray[0] !== undefined ? strings_array_in.findIndex((el) => el === chromArray[0]) : 0;
        const gPosition = strings_array_in.filter((str) => /^G\.[a-zA-Z0-9_><]+/.test(str.toUpperCase()))[0] !== undefined ? strings_array_in.filter((str) => /^G\.[a-zA-Z0-9_><]+/.test(str.toUpperCase()))[0].charAt(0).toLowerCase() + strings_array_in.filter((str) => /^G\.[a-zA-Z0-9_><]+/.test(str.toUpperCase()))[0].slice(1) : '';
        let positionAltRefArray = strings_array_in.slice(chromIndex).filter((str) => !assemblyArray.includes(str) && !chromArray.includes(str) && /^[ATCG0-9\-]+$/.test(str.toUpperCase()));
        let genomic_coord = '';
        let genomicBool = false;

        //se è presente un assembly + cromosoma, o il cromosoma è mito, la variante è genomica
        if ((assembly !== '' || /^((MT)|M|CHR((MT)|M))$/.test(chrom.toUpperCase())) && chrom !== '') {
            if (gPosition !== '') {
                genomic_coord = [assembly, chrom, gPosition].filter((str) => str !== '').join(':');
                strings_array_in = strings_array_in.filter((str) => !assemblyArray.includes(str) && !chromArray.includes(str));
            } else if (positionAltRefArray.length >= 3) {
                //controllo se i 3 elementi consecutivi sono position ref e alt
                const positionAltRefString = positionAltRefArray.slice(0, 3).join(':');
                genomic_coord = [assembly, chrom, positionAltRefString].filter((str) => str !== '').join(':');

                strings_array_in = strings_array_in.filter((str) => !assemblyArray.includes(str) && !chromArray.includes(str));
                for (let i = 0; i < 3; i++) {
                    strings_array_in.splice(strings_array_in.indexOf(positionAltRefArray[i]), 1)
                }
            }
        } else if (strings_array_in.length > 3 && chromArray[0] !== undefined && strings_array_in.slice(chromIndex + 1, chromIndex + 4).toString() === positionAltRefArray.slice(0, 3).toString()) {
            //controllo se ho tutte le coordinate genomiche ma non l'assembly e setto il booleano a true, così nel caso non ci fossero altri pattern validi 
            //verrà mostrato un errore dedicato alle coordinate genomiche
            genomicBool = true;
            strings_array_in = strings_array_in.filter((str) => !positionAltRefArray.slice(0, 3).includes(str));
        }

        //initialize hgvs const
        const hgvs_c = strings_array_in.filter((str) => /^C\.(?=.*[a-zA-Z0-9])\S{3,}$/.test(str.toUpperCase()))[0] !== undefined ? strings_array_in.filter((str) => /^C\.(?=.*[a-zA-Z0-9])\S{3,}$/.test(str.toUpperCase()))[0].charAt(0).toLowerCase() + strings_array_in.filter((str) => /^C\.(?=.*[a-zA-Z0-9])\S{3,}$/.test(str.toUpperCase()))[0].slice(1) : '';
        const hgvs_p = strings_array_in.filter((str) => /^P\.(?=.*[a-zA-Z0-9])\S{3,}$/.test(str.toUpperCase()))[0] !== undefined ? strings_array_in.filter((str) => /^P\.(?=.*[a-zA-Z0-9])\S{3,}$/.test(str.toUpperCase()))[0].charAt(0).toLowerCase() + strings_array_in.filter((str) => /^P\.(?=.*[a-zA-Z0-9])\S{3,}$/.test(str.toUpperCase()))[0].slice(1) : '';
        //hgvs_c e hgvs_m non possono essere usati insieme (corner case), se c'è c. l'altro lo metto come stringa vuota
        let hgvs_m = hgvs_c === '' ? (strings_array_in.filter((str) => /^M\.(?=.*[a-zA-Z0-9])\S{3,}$/.test(str.toUpperCase()))[0] !== undefined ? strings_array_in.filter((str) => /^M\.(?=.*[a-zA-Z0-9])\S{3,}$/.test(str.toUpperCase()))[0].charAt(0).toLowerCase() + strings_array_in.filter((str) => /^M\.(?=.*[a-zA-Z0-9])\S{3,}$/.test(str.toUpperCase()))[0].slice(1) : '') : '';
        const hgvs_val_check = (hgvs_m !== '' || hgvs_c !== '' || hgvs_p !== '');

        //rs
        const rsArray = strings_array_in.filter((str) => /^(RS)[\d.]{3,12}$/.test(str.toUpperCase()));
        const rs = rsArray[0] !== undefined ? rsArray[0] : ''; //prendo la stringa contenente rs

        //transcript
        const transcriptArray = strings_array_in.filter((str) => /^(ENST)[\d]{9,11}(\.\d+)?$/.test(str.toUpperCase()) || /^(NM_|NP_)[\d]{4,10}(\.\d+)?$/.test(str.toUpperCase()));
        const transcript = strings_array_in.filter((str) => /^(ENST)[\d]{9,11}(\.\d+)?$/.test(str.toUpperCase()))[0] !== undefined ? strings_array_in.filter((str) => /^(ENST)[\d]{9,11}(\.\d+)?$/.test(str.toUpperCase()))[0] : strings_array_in.filter((str) => /^(NM_|NP_)[\d]{4,10}(\.\d+)?$/.test(str.toUpperCase()))[0] !== undefined ? strings_array_in.filter((str) => /^(NM_|NP_)[\d]{4,10}(\.\d+)?$/.test(str.toUpperCase()))[0] : '';

        //gene
        const gene = strings_array_in.filter(x => !rsArray.includes(x) && !transcriptArray.includes(x) && /^(?=.*[A-Za-z])[A-Za-z0-9]+-?[A-Za-z0-9]+$/.test(x) && x.length < 33 && !assemblyArray.includes(x) && !chromArray.includes(x))[0] !== undefined ? strings_array_in.filter(x => !rsArray.includes(x) && !transcriptArray.includes(x) && /^(?=.*[A-Za-z])[A-Za-z0-9]+-?[A-Za-z0-9]+$/.test(x) && x.length < 33 && !assemblyArray.includes(x) && !chromArray.includes(x))[0] : '';

        if (genomicBool && gene !== '' && !hgvs_val_check && rs === '') {
            return {
                val: false,
                status: -1.6,
                text: 'It seems you’re trying to query a variant by genomic coordinates, but some information is missing. Please check the format and remember to specify the genomic assembly.'
            };
        }

        if (rs !== '' || gene !== '' || (hgvs_val_check && transcript !== '') || genomic_coord !== '') {
            return { text: { gene: gene, rs: rs, hgvs_c: hgvs_c, hgvs_p: hgvs_p, hgvs_m: hgvs_m, transcript: transcript, genomic_coord: genomic_coord }, val: true };
        } else {
            //if ho hgvs valido ma senza un trascritto o un gene, metto un messaggio diverso
            if (hgvs_val_check) {
                return {
                    val: false,
                    status: -1.8,
                    text: 'The HGVS coordinates you provided appear to be incomplete. Please specify gene symbols or transcript, or try another VarChat-supported format.'
                };
            } else if (gPosition !== '' || genomicBool) {
                return {
                    val: false,
                    status: -1.6,
                    text: 'It seems you’re trying to query a variant by genomic coordinates, but some information is missing. Please check the format and remember to specify the genomic assembly.'
                };
            } else {
                return {
                    val: false,
                    status: -1,
                    text: 'VarChat can’t handle this type of question. For best results, please enter variants or gene symbols using the suggested format.'
                };
            }
        }
    } else {
        return {
            val: false,
            status: -1,
            text: 'Currently, VarChat is not equipped to answer questions.  Please insert a variant in a supported format or a gene symbol.'
        };
    }
}