/*
 * Decompiled with CFR 0.152.
 */
package ch.systemsx.cisd.common.fasta;

import ch.systemsx.cisd.common.fasta.SequenceType;
import ch.systemsx.cisd.common.utilities.Counters;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

public class FastaUtilities {
    public static final List<Character> NUCLEIC_ACID_CODES = Arrays.asList(Character.valueOf('A'), Character.valueOf('C'), Character.valueOf('G'), Character.valueOf('T'), Character.valueOf('U'), Character.valueOf('R'), Character.valueOf('Y'), Character.valueOf('K'), Character.valueOf('M'), Character.valueOf('S'), Character.valueOf('W'), Character.valueOf('B'), Character.valueOf('D'), Character.valueOf('H'), Character.valueOf('V'), Character.valueOf('N'), Character.valueOf('X'), Character.valueOf('-'));
    public static final List<Character> STRICT_NUCLEIC_ACID_CODES = Arrays.asList(Character.valueOf('A'), Character.valueOf('T'), Character.valueOf('U'), Character.valueOf('C'), Character.valueOf('G'));
    public static final List<Character> AMINO_ACID_CODES = Arrays.asList(Character.valueOf('A'), Character.valueOf('B'), Character.valueOf('C'), Character.valueOf('D'), Character.valueOf('E'), Character.valueOf('F'), Character.valueOf('G'), Character.valueOf('H'), Character.valueOf('I'), Character.valueOf('J'), Character.valueOf('K'), Character.valueOf('L'), Character.valueOf('M'), Character.valueOf('N'), Character.valueOf('O'), Character.valueOf('P'), Character.valueOf('Q'), Character.valueOf('R'), Character.valueOf('S'), Character.valueOf('T'), Character.valueOf('U'), Character.valueOf('V'), Character.valueOf('W'), Character.valueOf('Y'), Character.valueOf('Z'), Character.valueOf('X'), Character.valueOf('*'), Character.valueOf('-'));
    public static final List<Character> STRICT_AMINO_ACID_CODES = Arrays.asList(Character.valueOf('A'), Character.valueOf('R'), Character.valueOf('N'), Character.valueOf('D'), Character.valueOf('C'), Character.valueOf('E'), Character.valueOf('Q'), Character.valueOf('G'), Character.valueOf('H'), Character.valueOf('I'), Character.valueOf('L'), Character.valueOf('K'), Character.valueOf('M'), Character.valueOf('F'), Character.valueOf('P'), Character.valueOf('S'), Character.valueOf('T'), Character.valueOf('W'), Character.valueOf('Y'), Character.valueOf('V'));
    private static final Set<Character> NUCLEIC_ACID_CODES_SET = new HashSet<Character>(NUCLEIC_ACID_CODES);
    private static final Set<Character> STRICT_NUCLEIC_ACID_CODES_SET = new HashSet<Character>(STRICT_NUCLEIC_ACID_CODES);
    private static final Set<Character> AMINO_ACID_CODES_SET = new HashSet<Character>(AMINO_ACID_CODES);
    private static final Set<Character> STRICT_AMINO_ACID_CODES_SET = new HashSet<Character>(STRICT_AMINO_ACID_CODES);

    public static SequenceType determineSequenceTypeOrNull(String line) {
        boolean isAminoAcidSequence = false;
        int nuclCounter = 0;
        int aminoCounter = 0;
        for (char c : line.toUpperCase().toCharArray()) {
            boolean isNucleicAcidCode = STRICT_NUCLEIC_ACID_CODES_SET.contains(Character.valueOf(c));
            boolean isAmoniAcidCode = STRICT_AMINO_ACID_CODES_SET.contains(Character.valueOf(c));
            if (!isNucleicAcidCode && !isAmoniAcidCode) {
                return null;
            }
            if (isNucleicAcidCode) {
                ++nuclCounter;
            }
            if (!isAmoniAcidCode) continue;
            ++aminoCounter;
            if (isNucleicAcidCode) continue;
            isAminoAcidSequence = true;
        }
        if (aminoCounter == line.length() && isAminoAcidSequence) {
            return SequenceType.PROT;
        }
        return nuclCounter == line.length() ? SequenceType.NUCL : null;
    }

    public static SequenceType determineSequenceType(String line) {
        Counters<Character> counters = new Counters<Character>();
        int sequenceCharacters = 0;
        for (char c : line.toUpperCase().toCharArray()) {
            boolean isNucleicAcidCode = NUCLEIC_ACID_CODES_SET.contains(Character.valueOf(c));
            boolean isAmoniAcidCode = AMINO_ACID_CODES_SET.contains(Character.valueOf(c));
            if (!isNucleicAcidCode && !isAmoniAcidCode) continue;
            ++sequenceCharacters;
            if (!isNucleicAcidCode) {
                return SequenceType.PROT;
            }
            counters.count(Character.valueOf(c));
        }
        if (counters.getNumberOfDifferentObjectsCounted() > 6 || FastaUtilities.containsUAndT(counters)) {
            return SequenceType.PROT;
        }
        int nonCommonNucleicAcidCodeSites = sequenceCharacters;
        char[] cArray = "ACGTUN-".toCharArray();
        int n = cArray.length;
        for (int i = 0; i < n; ++i) {
            Character c = Character.valueOf(cArray[i]);
            nonCommonNucleicAcidCodeSites -= counters.getCountOf(c);
        }
        return nonCommonNucleicAcidCodeSites == 0 ? SequenceType.NUCL : SequenceType.PROT;
    }

    private static boolean containsUAndT(Counters<Character> counters) {
        return counters.getCountOf(Character.valueOf('T')) > 0 && counters.getCountOf(Character.valueOf('U')) > 0;
    }
}

