/*
 * Decompiled with CFR 0.152.
 */
package edu.cmu.sphinx.linguist.language.ngram.large;

import edu.cmu.sphinx.linguist.language.ngram.large.UnigramProbability;
import edu.cmu.sphinx.util.LogMath;
import edu.cmu.sphinx.util.Utilities;
import java.io.BufferedInputStream;
import java.io.DataInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.RandomAccessFile;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class BinaryLoader {
    private static final String DARPA_TG_HEADER = "Darpa Trigram LM";
    private static final String DARPA_QG_HEADER = "Darpa Quadrigram LM";
    private static final String DARPA_NG_HEADER = "Darpa \\d-gram LM";
    private static final int LOG2_NGRAM_SEGMENT_SIZE = 9;
    private static final float MIN_PROBABILITY = -99.0f;
    private static final int MAX_PROB_TABLE_SIZE = Integer.MAX_VALUE;
    private LogMath logMath;
    private int maxNGram;
    private float unigramWeight;
    private float languageWeight;
    private double wip;
    private boolean bigEndian = true;
    private boolean applyLanguageWeightAndWip;
    private long bytesRead;
    private UnigramProbability[] unigrams;
    private String[] words;
    private long[] NGramOffset;
    private int[] numberNGrams;
    private int logNGramSegmentSize;
    private int startWordID = -1;
    private int endWordID = -1;
    private int[][] NGramSegmentTable;
    private float[][] NGramProbTable;
    private float[][] NGramBackoffTable;
    private RandomAccessFile file;
    private int bytesPerField;

    public BinaryLoader(File location, String format, boolean applyLanguageWeightAndWip, float languageWeight, double wip, float unigramWeight) throws IOException {
        this(format, applyLanguageWeightAndWip, languageWeight, wip, unigramWeight);
        this.loadModelLayout(new FileInputStream(location));
        this.file = new RandomAccessFile(location, "r");
    }

    public BinaryLoader(String format, boolean applyLanguageWeightAndWip, float languageWeight, double wip, float unigramWeight) {
        this.applyLanguageWeightAndWip = applyLanguageWeightAndWip;
        this.logMath = LogMath.getLogMath();
        this.languageWeight = languageWeight;
        this.wip = wip;
        this.unigramWeight = unigramWeight;
    }

    public void deallocate() throws IOException {
        if (null != this.file) {
            this.file.close();
        }
    }

    public int getNumberUnigrams() {
        return this.getNumberNGrams(1);
    }

    public int getNumberBigrams() {
        return this.getNumberNGrams(2);
    }

    public int getNumberTrigrams() {
        return this.getNumberNGrams(3);
    }

    public int getNumberNGrams(int n) {
        assert (n <= this.maxNGram & n > 0);
        return this.numberNGrams[n - 1];
    }

    public UnigramProbability[] getUnigrams() {
        return this.unigrams;
    }

    public float[] getBigramProbabilities() {
        return this.getNGramProbabilities(2);
    }

    public float[] getTrigramProbabilities() {
        return this.getNGramProbabilities(3);
    }

    public float[] getTrigramBackoffWeights() {
        return this.getNGramBackoffWeights(3);
    }

    public int[] getTrigramSegments() {
        return this.getNGramSegments(3);
    }

    public int getLogBigramSegmentSize() {
        return this.logNGramSegmentSize;
    }

    public float[] getNGramProbabilities(int n) {
        assert (n <= this.maxNGram && n > 1);
        return this.NGramProbTable[n - 1];
    }

    public float[] getNGramBackoffWeights(int n) {
        assert (n <= this.maxNGram & n > 2);
        return this.NGramBackoffTable[n - 1];
    }

    public int[] getNGramSegments(int n) {
        assert (n <= this.maxNGram & n > 2);
        return this.NGramSegmentTable[n - 1];
    }

    public int getLogNGramSegmentSize() {
        return this.logNGramSegmentSize;
    }

    public String[] getWords() {
        return this.words;
    }

    public long getBigramOffset() {
        return this.getNGramOffset(2);
    }

    public long getTrigramOffset() {
        return this.getNGramOffset(3);
    }

    public long getNGramOffset(int n) {
        assert (n <= this.maxNGram & n > 1);
        return this.NGramOffset[n - 1];
    }

    public int getMaxDepth() {
        return this.maxNGram;
    }

    public boolean getBigEndian() {
        return this.bigEndian;
    }

    public int getBytesPerField() {
        return this.bytesPerField;
    }

    public byte[] loadBuffer(long position, int size) throws IOException {
        this.file.seek(position);
        byte[] bytes = new byte[size];
        if (this.file.read(bytes) != size) {
            throw new IOException("Incorrect number of bytes read. Size = " + size + ". Position =" + position + ".");
        }
        return bytes;
    }

    protected void loadModelLayout(InputStream inputStream) throws IOException {
        DataInputStream stream = new DataInputStream(new BufferedInputStream(inputStream));
        this.readHeader(stream);
        this.unigrams = this.readUnigrams(stream, this.numberNGrams[0] + 1, this.bigEndian);
        this.skipNGrams(stream);
        for (int i = 1; i < this.maxNGram; ++i) {
            if (this.numberNGrams[i] <= 0) continue;
            if (i == 1) {
                this.NGramProbTable[i] = this.readFloatTable(stream, this.bigEndian);
                continue;
            }
            this.NGramBackoffTable[i] = this.readFloatTable(stream, this.bigEndian);
            this.NGramProbTable[i] = this.readFloatTable(stream, this.bigEndian);
            int nMinus1gramSegmentSize = 1 << this.logNGramSegmentSize;
            int NGramSegTableSize = (this.numberNGrams[i - 1] + 1) / nMinus1gramSegmentSize + 1;
            this.NGramSegmentTable[i] = this.readIntTable(stream, this.bigEndian, NGramSegTableSize);
        }
        int wordsStringLength = this.readInt(stream, this.bigEndian);
        if (wordsStringLength <= 0) {
            throw new Error("Bad word string size: " + wordsStringLength);
        }
        this.words = this.readWords(stream, wordsStringLength, this.numberNGrams[0]);
        if (this.startWordID > -1) {
            UnigramProbability unigram = this.unigrams[this.startWordID];
            unigram.setLogProbability(-99.0f);
        }
        if (this.endWordID > -1) {
            UnigramProbability unigram = this.unigrams[this.endWordID];
            unigram.setLogBackoff(-99.0f);
        }
        this.applyUnigramWeight();
        if (this.applyLanguageWeightAndWip) {
            for (int i = 0; i <= this.maxNGram; ++i) {
                this.applyLanguageWeight(this.NGramProbTable[i], this.languageWeight);
                this.applyWip(this.NGramProbTable[i], this.wip);
                if (i <= 1) continue;
                this.applyLanguageWeight(this.NGramBackoffTable[i], this.languageWeight);
            }
        }
        stream.close();
    }

    private void readHeader(DataInputStream stream) throws IOException {
        int headerLength = this.readInt(stream, this.bigEndian);
        if (headerLength != DARPA_TG_HEADER.length() + 1 && headerLength != DARPA_QG_HEADER.length() + 1 && headerLength != DARPA_NG_HEADER.length() - 1) {
            if ((headerLength = Utilities.swapInteger(headerLength)) == DARPA_TG_HEADER.length() + 1 || headerLength == DARPA_QG_HEADER.length() + 1 || headerLength == DARPA_NG_HEADER.length() - 1) {
                this.bigEndian = false;
            } else {
                throw new Error("Bad binary LM file magic number: " + headerLength + ", not an LM dumpfile?");
            }
        }
        String header = this.readString(stream, headerLength - 1);
        stream.readByte();
        ++this.bytesRead;
        if (!(header.equals(DARPA_TG_HEADER) || header.equals(DARPA_QG_HEADER) || Pattern.matches(DARPA_NG_HEADER, header))) {
            throw new Error("Bad binary LM file header: " + header);
        }
        if (header.equals(DARPA_TG_HEADER)) {
            this.maxNGram = 3;
        } else if (header.equals(DARPA_QG_HEADER)) {
            this.maxNGram = 4;
        } else {
            Pattern p = Pattern.compile("\\d");
            Matcher m = p.matcher(header);
            this.maxNGram = Integer.parseInt(m.group());
        }
        int fileNameLength = this.readInt(stream, this.bigEndian);
        this.skipStreamBytes(stream, fileNameLength);
        this.numberNGrams = new int[this.maxNGram];
        this.NGramOffset = new long[this.maxNGram];
        this.NGramProbTable = new float[this.maxNGram][];
        this.NGramBackoffTable = new float[this.maxNGram][];
        this.NGramSegmentTable = new int[this.maxNGram][];
        this.numberNGrams[0] = 0;
        this.logNGramSegmentSize = 9;
        int version = this.readInt(stream, this.bigEndian);
        this.bytesPerField = 2;
        if (version <= 0) {
            int formatLength;
            this.readInt(stream, this.bigEndian);
            if (version <= -3) {
                this.bytesPerField = 4;
            }
            while ((formatLength = this.readInt(stream, this.bigEndian)) != 0) {
                this.bytesRead += (long)stream.skipBytes(formatLength);
            }
            if (version == -2) {
                this.logNGramSegmentSize = this.readInt(stream, this.bigEndian);
                if (this.logNGramSegmentSize < 1 || this.logNGramSegmentSize > 15) {
                    throw new Error("log2(bg_seg_sz) outside range 1..15");
                }
            }
            this.numberNGrams[0] = this.readInt(stream, this.bigEndian);
        } else {
            this.numberNGrams[0] = version;
        }
        if (this.numberNGrams[0] <= 0) {
            throw new Error("Bad number of unigrams: " + this.numberNGrams[0] + ", must be > 0.");
        }
        for (int i = 1; i < this.maxNGram; ++i) {
            this.numberNGrams[i] = this.readInt(stream, this.bigEndian);
            if (this.numberNGrams[i] >= 0) continue;
            throw new Error("Bad number of " + String.valueOf(i) + "-grams: " + this.numberNGrams[i]);
        }
    }

    private void skipNGrams(DataInputStream stream) throws IOException {
        this.NGramOffset[1] = this.bytesRead;
        long bytesToSkip = (this.numberNGrams[1] + 1) * 4 * this.getBytesPerField();
        this.skipStreamBytes(stream, bytesToSkip);
        for (int i = 2; i < this.maxNGram; ++i) {
            if (this.numberNGrams[i] > 0 && i < this.maxNGram - 1) {
                this.NGramOffset[i] = this.bytesRead;
                bytesToSkip = (long)(this.numberNGrams[i] + 1) * 4L * (long)this.getBytesPerField();
                this.skipStreamBytes(stream, bytesToSkip);
                continue;
            }
            if (this.numberNGrams[i] <= 0 || i != this.maxNGram - 1) continue;
            this.NGramOffset[i] = this.bytesRead;
            bytesToSkip = (long)this.numberNGrams[i] * 2L * (long)this.getBytesPerField();
            this.skipStreamBytes(stream, bytesToSkip);
        }
    }

    private void skipStreamBytes(DataInputStream stream, long bytes) throws IOException {
        while (bytes > 0L) {
            long skipped = stream.skip(bytes);
            this.bytesRead += skipped;
            bytes -= skipped;
        }
    }

    private void applyUnigramWeight() {
        float logUnigramWeight = this.logMath.linearToLog(this.unigramWeight);
        float logNotUnigramWeight = this.logMath.linearToLog(1.0f - this.unigramWeight);
        float logUniform = this.logMath.linearToLog(1.0f / (float)this.numberNGrams[0]);
        float logWip = this.logMath.linearToLog(this.wip);
        float p2 = logUniform + logNotUnigramWeight;
        for (int i = 0; i < this.numberNGrams[0]; ++i) {
            UnigramProbability unigram = this.unigrams[i];
            float p1 = unigram.getLogProbability();
            if (i != this.startWordID) {
                p1 += logUnigramWeight;
                p1 = this.logMath.addAsLinear(p1, p2);
            }
            if (this.applyLanguageWeightAndWip) {
                p1 = p1 * this.languageWeight + logWip;
                unigram.setLogBackoff(unigram.getLogBackoff() * this.languageWeight);
            }
            unigram.setLogProbability(p1);
        }
    }

    private void applyLanguageWeight(float[] logProbabilities, float languageWeight) {
        for (int i = 0; i < logProbabilities.length; ++i) {
            logProbabilities[i] = logProbabilities[i] * languageWeight;
        }
    }

    private void applyWip(float[] logProbabilities, double wip) {
        float logWip = this.logMath.linearToLog(wip);
        for (int i = 0; i < logProbabilities.length; ++i) {
            logProbabilities[i] = logProbabilities[i] + logWip;
        }
    }

    private float[] readFloatTable(DataInputStream stream, boolean bigEndian) throws IOException {
        int numProbs = this.readInt(stream, bigEndian);
        if (numProbs <= 0 || numProbs > Integer.MAX_VALUE) {
            throw new Error("Bad probabilities table size: " + numProbs);
        }
        float[] probTable = new float[numProbs];
        for (int i = 0; i < numProbs; ++i) {
            probTable[i] = this.logMath.log10ToLog(this.readFloat(stream, bigEndian));
        }
        return probTable;
    }

    private int[] readIntTable(DataInputStream stream, boolean bigEndian, int tableSize) throws IOException {
        int numSegments = this.readInt(stream, bigEndian);
        if (numSegments != tableSize) {
            throw new Error("Bad NGram seg table size: " + numSegments);
        }
        int[] segmentTable = new int[numSegments];
        for (int i = 0; i < numSegments; ++i) {
            segmentTable[i] = this.readInt(stream, bigEndian);
        }
        return segmentTable;
    }

    private UnigramProbability[] readUnigrams(DataInputStream stream, int numberUnigrams, boolean bigEndian) throws IOException {
        UnigramProbability[] unigrams = new UnigramProbability[numberUnigrams];
        for (int i = 0; i < numberUnigrams; ++i) {
            int unigramID = this.readInt(stream, bigEndian);
            if (unigramID < 1) {
                unigramID = i;
            }
            if (i != numberUnigrams - 1) assert (unigramID == i);
            float unigramProbability = this.readFloat(stream, bigEndian);
            float unigramBackoff = this.readFloat(stream, bigEndian);
            int firstBigramEntry = this.readInt(stream, bigEndian);
            float logProbability = this.logMath.log10ToLog(unigramProbability);
            float logBackoff = this.logMath.log10ToLog(unigramBackoff);
            unigrams[i] = new UnigramProbability(unigramID, logProbability, logBackoff, firstBigramEntry);
        }
        return unigrams;
    }

    private int readInt(DataInputStream stream, boolean bigEndian) throws IOException {
        this.bytesRead += 4L;
        if (bigEndian) {
            return stream.readInt();
        }
        return Utilities.readLittleEndianInt(stream);
    }

    private float readFloat(DataInputStream stream, boolean bigEndian) throws IOException {
        this.bytesRead += 4L;
        if (bigEndian) {
            return stream.readFloat();
        }
        return Utilities.readLittleEndianFloat(stream);
    }

    private String readString(DataInputStream stream, int length) throws IOException {
        StringBuilder builder = new StringBuilder();
        byte[] bytes = new byte[length];
        this.bytesRead += (long)stream.read(bytes);
        for (int i = 0; i < length; ++i) {
            builder.append((char)bytes[i]);
        }
        return builder.toString();
    }

    private String[] readWords(DataInputStream stream, int length, int numberUnigrams) throws IOException {
        String[] words = new String[numberUnigrams];
        byte[] bytes = new byte[length];
        this.bytesRead += (long)stream.read(bytes);
        int s = 0;
        int wordStart = 0;
        for (int i = 0; i < length; ++i) {
            char c = (char)(bytes[i] & 0xFF);
            ++this.bytesRead;
            if (c != '\u0000') continue;
            words[s] = new String(bytes, wordStart, i - wordStart);
            wordStart = i + 1;
            if (words[s].equals("<s>")) {
                this.startWordID = s;
            } else if (words[s].equals("</s>")) {
                this.endWordID = s;
            }
            ++s;
        }
        assert (s == numberUnigrams);
        return words;
    }
}

