/*
 * Decompiled with CFR 0.152.
 */
package org.apache.sysml.runtime.transform;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.regex.Pattern;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.JobConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.sysml.conf.ConfigurationManager;
import org.apache.sysml.conf.DMLConfig;
import org.apache.sysml.lops.LopProperties;
import org.apache.sysml.parser.Expression;
import org.apache.sysml.runtime.DMLRuntimeException;
import org.apache.sysml.runtime.controlprogram.caching.CacheableData;
import org.apache.sysml.runtime.controlprogram.caching.FrameObject;
import org.apache.sysml.runtime.controlprogram.caching.MatrixObject;
import org.apache.sysml.runtime.controlprogram.context.ExecutionContext;
import org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext;
import org.apache.sysml.runtime.instructions.InstructionParser;
import org.apache.sysml.runtime.instructions.MRJobInstruction;
import org.apache.sysml.runtime.instructions.cp.ParameterizedBuiltinCPInstruction;
import org.apache.sysml.runtime.instructions.mr.CSVReblockInstruction;
import org.apache.sysml.runtime.instructions.spark.ParameterizedBuiltinSPInstruction;
import org.apache.sysml.runtime.instructions.spark.data.RDDObject;
import org.apache.sysml.runtime.instructions.spark.utils.RDDConverterUtils;
import org.apache.sysml.runtime.io.IOUtilFunctions;
import org.apache.sysml.runtime.matrix.CSVReblockMR;
import org.apache.sysml.runtime.matrix.JobReturn;
import org.apache.sysml.runtime.matrix.MatrixCharacteristics;
import org.apache.sysml.runtime.matrix.data.CSVFileFormatProperties;
import org.apache.sysml.runtime.matrix.data.FileFormatProperties;
import org.apache.sysml.runtime.matrix.data.InputInfo;
import org.apache.sysml.runtime.matrix.data.MatrixBlock;
import org.apache.sysml.runtime.matrix.data.OutputInfo;
import org.apache.sysml.runtime.matrix.mapred.MRJobConfiguration;
import org.apache.sysml.runtime.transform.ApplyTfBBMR;
import org.apache.sysml.runtime.transform.ApplyTfCSVMR;
import org.apache.sysml.runtime.transform.ApplyTfCSVSPARK;
import org.apache.sysml.runtime.transform.BinAgent;
import org.apache.sysml.runtime.transform.DummycodeAgent;
import org.apache.sysml.runtime.transform.GenTfMtdMR;
import org.apache.sysml.runtime.transform.GenTfMtdSPARK;
import org.apache.sysml.runtime.transform.MVImputeAgent;
import org.apache.sysml.runtime.transform.OmitAgent;
import org.apache.sysml.runtime.transform.RecodeAgent;
import org.apache.sysml.runtime.transform.TfUtils;
import org.apache.sysml.runtime.util.MapReduceTool;
import org.apache.sysml.runtime.util.UtilFunctions;
import org.apache.sysml.utils.JSONHelper;
import org.apache.wink.json4j.JSONArray;
import org.apache.wink.json4j.JSONException;
import org.apache.wink.json4j.JSONObject;
import scala.Tuple2;

public class DataTransform {
    private static final String ERROR_MSG_ZERO_ROWS = "Number of rows in the transformed output (potentially, after ommitting the ones with missing values) is zero. Cannot proceed.";

    private static String readHeaderLine(FileSystem fs, CSVFileFormatProperties prop, String smallestFile) throws IOException {
        String line = null;
        try (BufferedReader br = new BufferedReader(new InputStreamReader((InputStream)fs.open(new Path(smallestFile))));){
            line = br.readLine();
        }
        if (!prop.hasHeader()) {
            int ncol = Pattern.compile(Pattern.quote(prop.getDelim())).split(line, -1).length;
            line = null;
            StringBuilder sb = new StringBuilder();
            sb.append("V1");
            for (int i = 2; i <= ncol; ++i) {
                sb.append(prop.getDelim() + "V" + i);
            }
            line = sb.toString();
        }
        return line;
    }

    private static HashMap<String, Integer> processColumnNames(FileSystem fs, CSVFileFormatProperties prop, String headerLine, String smallestFile) throws IllegalArgumentException, IOException {
        HashMap<String, Integer> colNames = new HashMap<String, Integer>();
        String escapedDelim = Pattern.quote(prop.getDelim());
        Pattern compiledDelim = Pattern.compile(escapedDelim);
        String[] names = compiledDelim.split(headerLine, -1);
        for (int i = 0; i < names.length; ++i) {
            colNames.put(UtilFunctions.unquote(names[i].trim()), i + 1);
        }
        return colNames;
    }

    private static void inplacePermute(int[] list, byte[] mthd, Object[] cst, Integer[] indices) {
        byte xb = 0;
        Object xo = null;
        for (int i = 0; i < list.length; ++i) {
            int x = list[i];
            xb = mthd[i];
            if (cst != null) {
                xo = cst[i];
            }
            int j = i;
            while (true) {
                int k = indices[j];
                indices[j] = j;
                if (k == i) break;
                list[j] = list[k];
                mthd[j] = mthd[k];
                if (cst != null) {
                    cst[j] = cst[k];
                }
                j = k;
            }
            list[j] = x;
            mthd[j] = xb;
            if (cst == null) continue;
            cst[j] = xo;
        }
    }

    private static String processSpecFile(FileSystem fs, String inputPath, String smallestFile, HashMap<String, Integer> colNames, CSVFileFormatProperties prop, String specWithNames) throws IllegalArgumentException, IOException, JSONException {
        int colID;
        int[] scaleList;
        int[] dcdList;
        int[] binList;
        int[] rcdList;
        int i;
        int[] mvList;
        int i2;
        int[] omitList;
        JSONArray arrtmp;
        boolean byPositions;
        JSONObject inputSpec = new JSONObject(specWithNames);
        String NAME = "name";
        String ID = "id";
        String METHOD = "method";
        String VALUE = "value";
        String MV_METHOD_MEAN = "global_mean";
        String MV_METHOD_MODE = "global_mode";
        String MV_METHOD_CONSTANT = "constant";
        String BIN_METHOD_WIDTH = "equi-width";
        String BIN_METHOD_HEIGHT = "equi-height";
        String SCALE_METHOD_Z = "z-score";
        String SCALE_METHOD_M = "mean-subtraction";
        String JSON_BYPOS = "ids";
        String stmp = null;
        JSONObject entry = null;
        int btmp = 0;
        byte[] mvMethods = null;
        byte[] binMethods = null;
        byte[] scaleMethods = null;
        Object[] numBins = null;
        Object[] mvConstants = null;
        boolean bl = byPositions = inputSpec.containsKey("ids") && (Boolean)inputSpec.get("ids") == true;
        if (inputSpec.containsKey("omit")) {
            arrtmp = (JSONArray)inputSpec.get("omit");
            omitList = new int[arrtmp.size()];
            for (i2 = 0; i2 < arrtmp.size(); ++i2) {
                if (byPositions) {
                    omitList[i2] = UtilFunctions.toInt(arrtmp.get(i2));
                    continue;
                }
                stmp = UtilFunctions.unquote((String)arrtmp.get(i2));
                omitList[i2] = colNames.get(stmp);
            }
            Arrays.sort(omitList);
        } else {
            omitList = null;
        }
        if (inputSpec.containsKey("impute")) {
            arrtmp = (JSONArray)inputSpec.get("impute");
            mvList = new int[arrtmp.size()];
            mvMethods = new byte[arrtmp.size()];
            mvConstants = new Object[arrtmp.size()];
            for (i2 = 0; i2 < arrtmp.size(); ++i2) {
                entry = (JSONObject)arrtmp.get(i2);
                if (byPositions) {
                    mvList[i2] = UtilFunctions.toInt(entry.get("id"));
                } else {
                    stmp = UtilFunctions.unquote((String)entry.get("name"));
                    mvList[i2] = colNames.get(stmp);
                }
                stmp = UtilFunctions.unquote((String)entry.get("method"));
                if (stmp.equals("global_mean")) {
                    btmp = 1;
                } else if (stmp.equals("global_mode")) {
                    btmp = 2;
                } else if (stmp.equals("constant")) {
                    btmp = 3;
                } else {
                    throw new IOException("Unknown missing value imputation method (" + stmp + ") in transformation specification: " + specWithNames);
                }
                mvMethods[i2] = btmp;
                mvConstants[i2] = null;
                if (!entry.containsKey("value")) continue;
                mvConstants[i2] = entry.get("value");
            }
            Integer[] idx = new Integer[mvList.length];
            for (i = 0; i < mvList.length; ++i) {
                idx[i] = i;
            }
            Arrays.sort(idx, new Comparator<Integer>(){

                @Override
                public int compare(Integer o1, Integer o2) {
                    return mvList[o1] - mvList[o2];
                }
            });
            DataTransform.inplacePermute(mvList, mvMethods, mvConstants, idx);
        } else {
            mvList = null;
        }
        if (inputSpec.containsKey("recode")) {
            arrtmp = (JSONArray)inputSpec.get("recode");
            rcdList = new int[arrtmp.size()];
            for (int i3 = 0; i3 < arrtmp.size(); ++i3) {
                if (byPositions) {
                    rcdList[i3] = UtilFunctions.toInt(arrtmp.get(i3));
                    continue;
                }
                stmp = UtilFunctions.unquote((String)arrtmp.get(i3));
                rcdList[i3] = colNames.get(stmp);
            }
            Arrays.sort(rcdList);
        } else {
            rcdList = null;
        }
        if (inputSpec.containsKey("bin")) {
            arrtmp = (JSONArray)inputSpec.get("bin");
            binList = new int[arrtmp.size()];
            binMethods = new byte[arrtmp.size()];
            numBins = new Object[arrtmp.size()];
            for (int i4 = 0; i4 < arrtmp.size(); ++i4) {
                entry = (JSONObject)arrtmp.get(i4);
                if (byPositions) {
                    binList[i4] = UtilFunctions.toInt(entry.get("id"));
                } else {
                    stmp = UtilFunctions.unquote((String)entry.get("name"));
                    binList[i4] = colNames.get(stmp);
                }
                stmp = UtilFunctions.unquote((String)entry.get("method"));
                if (!stmp.equals("equi-width")) {
                    if (stmp.equals("equi-height")) {
                        throw new IOException("Equi-height binning method is not yet supported, in transformation specification: " + specWithNames);
                    }
                    throw new IOException("Unknown missing value imputation method (" + stmp + ") in transformation specification: " + specWithNames);
                }
                btmp = 1;
                binMethods[i4] = btmp;
                numBins[i4] = entry.get("numbins");
                if ((Integer)numBins[i4] > 1) continue;
                throw new IllegalArgumentException("Invalid transformation on column \"" + (String)entry.get("name") + "\". Number of bins must be greater than 1.");
            }
            Integer[] idx = new Integer[binList.length];
            for (i = 0; i < binList.length; ++i) {
                idx[i] = i;
            }
            Arrays.sort(idx, new Comparator<Integer>(){

                @Override
                public int compare(Integer o1, Integer o2) {
                    return binList[o1] - binList[o2];
                }
            });
            DataTransform.inplacePermute(binList, binMethods, numBins, idx);
        } else {
            binList = null;
        }
        if (inputSpec.containsKey("dummycode")) {
            arrtmp = (JSONArray)inputSpec.get("dummycode");
            dcdList = new int[arrtmp.size()];
            for (int i5 = 0; i5 < arrtmp.size(); ++i5) {
                if (byPositions) {
                    dcdList[i5] = UtilFunctions.toInt(arrtmp.get(i5));
                    continue;
                }
                stmp = UtilFunctions.unquote((String)arrtmp.get(i5));
                dcdList[i5] = colNames.get(stmp);
            }
            Arrays.sort(dcdList);
        } else {
            dcdList = null;
        }
        if (inputSpec.containsKey("scale")) {
            arrtmp = (JSONArray)inputSpec.get("scale");
            scaleList = new int[arrtmp.size()];
            scaleMethods = new byte[arrtmp.size()];
            for (int i6 = 0; i6 < arrtmp.size(); ++i6) {
                entry = (JSONObject)arrtmp.get(i6);
                if (byPositions) {
                    scaleList[i6] = UtilFunctions.toInt(entry.get("id"));
                } else {
                    stmp = UtilFunctions.unquote((String)entry.get("name"));
                    scaleList[i6] = colNames.get(stmp);
                }
                stmp = UtilFunctions.unquote((String)entry.get("method"));
                if (stmp.equals("mean-subtraction")) {
                    btmp = 1;
                } else if (stmp.equals("z-score")) {
                    btmp = 2;
                } else {
                    throw new IOException("Unknown missing value imputation method (" + stmp + ") in transformation specification: " + specWithNames);
                }
                scaleMethods[i6] = btmp;
            }
            Integer[] idx = new Integer[scaleList.length];
            for (i = 0; i < scaleList.length; ++i) {
                idx[i] = i;
            }
            Arrays.sort(idx, new Comparator<Integer>(){

                @Override
                public int compare(Integer o1, Integer o2) {
                    return scaleList[o1] - scaleList[o2];
                }
            });
            DataTransform.inplacePermute(scaleList, scaleMethods, null, idx);
        } else {
            scaleList = null;
        }
        ArrayList<Integer> tmpList = new ArrayList<Integer>();
        if (mvList != null) {
            for (int i7 = 0; i7 < mvList.length; ++i7) {
                int colID2 = mvList[i7];
                if (mvMethods[i7] != 2 || rcdList != null && Arrays.binarySearch(rcdList, colID2) >= 0) continue;
                tmpList.add(colID2);
            }
        }
        int[] mvrcdList = null;
        if (tmpList.size() > 0) {
            mvrcdList = new int[tmpList.size()];
            for (i = 0; i < tmpList.size(); ++i) {
                mvrcdList[i] = (Integer)tmpList.get(i);
            }
        }
        if (mvList != null) {
            for (i = 0; i < mvList.length; ++i) {
                colID = mvList[i];
                if (omitList != null && Arrays.binarySearch(omitList, colID) >= 0) {
                    throw new IllegalArgumentException("Invalid transformations on column ID " + colID + ". A column can not be both omitted and imputed.");
                }
                if (mvMethods[i] != 1) continue;
                if (rcdList != null && Arrays.binarySearch(rcdList, colID) >= 0) {
                    throw new IllegalArgumentException("Invalid transformations on column ID " + colID + ". A numeric column can not be recoded.");
                }
                if (dcdList == null || Arrays.binarySearch(dcdList, colID) < 0 || binList != null && Arrays.binarySearch(binList, colID) >= 0) continue;
                throw new IllegalArgumentException("Invalid transformations on column ID " + colID + ". A numeric column can not be dummycoded.");
            }
        }
        if (scaleList != null) {
            for (i = 0; i < scaleList.length; ++i) {
                colID = scaleList[i];
                if (rcdList != null && Arrays.binarySearch(rcdList, colID) >= 0) {
                    throw new IllegalArgumentException("Invalid transformations on column ID " + colID + ". A column can not be recoded and scaled.");
                }
                if (binList != null && Arrays.binarySearch(binList, colID) >= 0) {
                    throw new IllegalArgumentException("Invalid transformations on column ID " + colID + ". A column can not be binned and scaled.");
                }
                if (dcdList == null || Arrays.binarySearch(dcdList, colID) < 0) continue;
                throw new IllegalArgumentException("Invalid transformations on column ID " + colID + ". A column can not be dummycoded and scaled.");
            }
        }
        if (rcdList != null) {
            for (i = 0; i < rcdList.length; ++i) {
                colID = rcdList[i];
                if (binList == null || Arrays.binarySearch(binList, colID) < 0) continue;
                throw new IllegalArgumentException("Invalid transformations on column ID " + colID + ". A column can not be recoded and binned.");
            }
        }
        ArrayList<Integer> addToRcd = new ArrayList<Integer>();
        if (dcdList != null) {
            for (int i8 = 0; i8 < dcdList.length; ++i8) {
                boolean isBinned;
                int colID3 = dcdList[i8];
                boolean isRecoded = rcdList != null && Arrays.binarySearch(rcdList, colID3) >= 0;
                boolean bl2 = isBinned = binList != null && Arrays.binarySearch(binList, colID3) >= 0;
                if (isRecoded || isBinned) continue;
                addToRcd.add(colID3);
            }
        }
        if (addToRcd.size() > 0) {
            int[] newRcdList = null;
            newRcdList = rcdList != null ? Arrays.copyOf(rcdList, rcdList.length + addToRcd.size()) : new int[addToRcd.size()];
            int i9 = rcdList != null ? rcdList.length : 0;
            int idx = 0;
            while (i9 < newRcdList.length) {
                newRcdList[i9] = (Integer)addToRcd.get(idx);
                ++i9;
                ++idx;
            }
            Arrays.sort(newRcdList);
            rcdList = newRcdList;
        }
        JSONObject outputSpec = new JSONObject();
        if (omitList != null) {
            JSONObject rcdSpec = new JSONObject();
            rcdSpec.put("attributes", DataTransform.toJSONArray(omitList));
            outputSpec.put("omit", rcdSpec);
        }
        if (mvList != null) {
            JSONObject mvSpec = new JSONObject();
            mvSpec.put("attributes", DataTransform.toJSONArray(mvList));
            mvSpec.put("methods", DataTransform.toJSONArray(mvMethods));
            mvSpec.put("constants", DataTransform.toJSONArray(mvConstants));
            outputSpec.put("impute", mvSpec);
        }
        if (rcdList != null) {
            JSONObject rcdSpec = new JSONObject();
            rcdSpec.put("attributes", DataTransform.toJSONArray(rcdList));
            outputSpec.put("recode", rcdSpec);
        }
        if (binList != null) {
            JSONObject binSpec = new JSONObject();
            binSpec.put("attributes", DataTransform.toJSONArray(binList));
            binSpec.put("methods", DataTransform.toJSONArray(binMethods));
            binSpec.put("numbins", DataTransform.toJSONArray(numBins));
            outputSpec.put("bin", binSpec);
        }
        if (dcdList != null) {
            JSONObject dcdSpec = new JSONObject();
            dcdSpec.put("attributes", DataTransform.toJSONArray(dcdList));
            outputSpec.put("dummycode", dcdSpec);
        }
        if (scaleList != null) {
            JSONObject scaleSpec = new JSONObject();
            scaleSpec.put("attributes", DataTransform.toJSONArray(scaleList));
            scaleSpec.put("methods", DataTransform.toJSONArray(scaleMethods));
            outputSpec.put("scale", scaleSpec);
        }
        if (mvrcdList != null) {
            JSONObject mvrcd = new JSONObject();
            mvrcd.put("attributes", DataTransform.toJSONArray(mvrcdList));
            outputSpec.put("mvrcd", mvrcd);
        }
        return outputSpec.toString();
    }

    private static JSONArray toJSONArray(int[] list) {
        JSONArray ret = new JSONArray(list.length);
        for (int i = 0; i < list.length; ++i) {
            ret.add((Object)list[i]);
        }
        return ret;
    }

    private static JSONArray toJSONArray(byte[] list) {
        JSONArray ret = new JSONArray(list.length);
        for (int i = 0; i < list.length; ++i) {
            ret.add((Object)list[i]);
        }
        return ret;
    }

    private static JSONArray toJSONArray(Object[] list) throws JSONException {
        return new JSONArray(list);
    }

    private static void moveFilesFromTmp(FileSystem fs, String tmpPath, String txMtdPath) throws IllegalArgumentException, IOException {
        MapReduceTool.renameFileOnHDFS(tmpPath + "/" + "column.names", txMtdPath + "/" + "column.names");
        MapReduceTool.renameFileOnHDFS(tmpPath + "/" + "dummycoded.column.names", txMtdPath + "/" + "dummycoded.column.names");
        MapReduceTool.renameFileOnHDFS(tmpPath + "/" + "coltypes.csv", txMtdPath + "/" + "coltypes.csv");
        if (fs.exists(new Path(tmpPath + "/Dummycode/" + "dummyCodeMaps.csv"))) {
            if (!fs.exists(new Path(txMtdPath + "/Dummycode/"))) {
                fs.mkdirs(new Path(txMtdPath + "/Dummycode/"));
            }
            MapReduceTool.renameFileOnHDFS(tmpPath + "/Dummycode/" + "dummyCodeMaps.csv", txMtdPath + "/Dummycode/" + "dummyCodeMaps.csv");
        }
    }

    private static int getNumColumnsTf(FileSystem fs, String header, String delim, String tfMtdPath) throws IllegalArgumentException, IOException, DMLRuntimeException, JSONException {
        String[] columnNames = Pattern.compile(Pattern.quote(delim)).split(header, -1);
        int ret = columnNames.length;
        JSONObject spec = null;
        BufferedReader br = new BufferedReader(new InputStreamReader((InputStream)fs.open(new Path(tfMtdPath + "/spec.json"))));
        Object object = null;
        try {
            spec = JSONHelper.parse(br);
        }
        catch (Throwable throwable) {
            object = throwable;
            throw throwable;
        }
        finally {
            if (br != null) {
                if (object != null) {
                    try {
                        br.close();
                    }
                    catch (Throwable throwable) {
                        ((Throwable)object).addSuppressed(throwable);
                    }
                } else {
                    br.close();
                }
            }
        }
        if (!spec.containsKey("dummycode")) {
            return ret;
        }
        JSONArray dcdList = (JSONArray)((JSONObject)spec.get("dummycode")).get("attributes");
        for (Object o : dcdList) {
            Throwable throwable;
            BufferedReader br2;
            int id = UtilFunctions.toInt(o);
            Path binpath = new Path(tfMtdPath + "/Bin/" + UtilFunctions.unquote(columnNames[id - 1]) + ".bin");
            Path rcdpath = new Path(tfMtdPath + "/Recode/" + UtilFunctions.unquote(columnNames[id - 1]) + ".ndistinct");
            if (TfUtils.checkValidInputFile(fs, binpath, false)) {
                int nbins = -1;
                br2 = new BufferedReader(new InputStreamReader((InputStream)fs.open(binpath)));
                throwable = null;
                try {
                    nbins = UtilFunctions.parseToInt(br2.readLine().split(",")[4]);
                }
                catch (Throwable throwable2) {
                    throwable = throwable2;
                    throw throwable2;
                }
                finally {
                    if (br2 != null) {
                        if (throwable != null) {
                            try {
                                br2.close();
                            }
                            catch (Throwable throwable3) {
                                throwable.addSuppressed(throwable3);
                            }
                        } else {
                            br2.close();
                        }
                    }
                }
                ret += nbins - 1;
                continue;
            }
            if (TfUtils.checkValidInputFile(fs, rcdpath, false)) {
                int ndistinct = -1;
                br2 = new BufferedReader(new InputStreamReader((InputStream)fs.open(rcdpath)));
                throwable = null;
                try {
                    ndistinct = UtilFunctions.parseToInt(br2.readLine());
                }
                catch (Throwable throwable4) {
                    throwable = throwable4;
                    throw throwable4;
                }
                finally {
                    if (br2 != null) {
                        if (throwable != null) {
                            try {
                                br2.close();
                            }
                            catch (Throwable throwable5) {
                                throwable.addSuppressed(throwable5);
                            }
                        } else {
                            br2.close();
                        }
                    }
                }
                ret += ndistinct - 1;
                continue;
            }
            throw new DMLRuntimeException("Relevant transformation metadata for column (id=" + id + ", name=" + columnNames[id - 1] + ") is not found.");
        }
        return ret;
    }

    public static JobReturn mrDataTransform(MRJobInstruction jobinst, MatrixObject[] inputs, String shuffleInst, String otherInst, byte[] resultIndices, MatrixObject[] outputs, int numReducers, int replication) throws Exception {
        String specWithIDs;
        String[] insts = shuffleInst.split("\u2021");
        TransformOperands oprnds = new TransformOperands(insts[0], (CacheableData)inputs[0]);
        JobConf job = new JobConf((Configuration)ConfigurationManager.getCachedJobConf());
        FileSystem fs = FileSystem.get((Configuration)job);
        String smallestFile = CSVReblockMR.findSmallestFile(job, oprnds.inputPath);
        String headerLine = DataTransform.readHeaderLine(fs, oprnds.inputCSVProperties, smallestFile);
        HashMap<String, Integer> colNamesToIds = DataTransform.processColumnNames(fs, oprnds.inputCSVProperties, headerLine, smallestFile);
        String outHeader = DataTransform.getOutputHeader(fs, headerLine, oprnds);
        int numColumns = colNamesToIds.size();
        int numColumnsTf = 0;
        long numRowsTf = 0L;
        ArrayList<Integer> csvoutputs = new ArrayList<Integer>();
        ArrayList<Integer> bboutputs = new ArrayList<Integer>();
        for (int i = 0; i < outputs.length; ++i) {
            if (outputs[i].getFileFormatProperties() != null && outputs[i].getFileFormatProperties().getFileFormat() == FileFormatProperties.FileFormat.CSV) {
                csvoutputs.add(i);
                continue;
            }
            bboutputs.add(i);
        }
        boolean isCSV = csvoutputs.size() > 0;
        boolean isBB = bboutputs.size() > 0;
        String tmpPath = MRJobConfiguration.constructTempOutputFilename();
        DataTransform.checkIfOutputOverlapsWithTxMtd(outputs, oprnds, isCSV, isBB, csvoutputs, bboutputs, fs);
        JobReturn retCSV = null;
        JobReturn retBB = null;
        if (!oprnds.isApply) {
            specWithIDs = DataTransform.processSpecFile(fs, oprnds.inputPath, smallestFile, colNamesToIds, oprnds.inputCSVProperties, oprnds.spec);
            colNamesToIds = null;
            String partOffsetsFile = MRJobConfiguration.constructTempOutputFilename();
            numRowsTf = GenTfMtdMR.runJob(oprnds.inputPath, oprnds.txMtdPath, specWithIDs, smallestFile, partOffsetsFile, oprnds.inputCSVProperties, numColumns, replication, outHeader);
            if (numRowsTf == 0L) {
                throw new DMLRuntimeException(ERROR_MSG_ZERO_ROWS);
            }
            MapReduceTool.writeStringToHDFS(specWithIDs, oprnds.txMtdPath + "/spec.json");
            numColumnsTf = DataTransform.getNumColumnsTf(fs, outHeader, oprnds.inputCSVProperties.getDelim(), oprnds.txMtdPath);
            if (isCSV) {
                retCSV = ApplyTfCSVMR.runJob(oprnds.inputPath, specWithIDs, oprnds.txMtdPath, tmpPath, outputs[csvoutputs.get(0)].getFileName(), partOffsetsFile, oprnds.inputCSVProperties, numColumns, replication, outHeader);
            }
            if (isBB) {
                DMLConfig conf = ConfigurationManager.getDMLConfig();
                int blockSize = conf.getIntValue("defaultblocksize");
                CSVReblockInstruction rblk = DataTransform.prepDummyReblockInstruction(oprnds.inputCSVProperties, blockSize);
                CSVReblockMR.AssignRowIDMRReturn ret1 = CSVReblockMR.runAssignRowIDMRJob(new String[]{oprnds.inputPath}, new InputInfo[]{InputInfo.CSVInputInfo}, new int[]{blockSize}, new int[]{blockSize}, rblk.toString(), replication, new String[]{smallestFile}, true, oprnds.inputCSVProperties.getNAStrings(), specWithIDs);
                if (ret1.rlens[0] == 0L) {
                    throw new DMLRuntimeException(ERROR_MSG_ZERO_ROWS);
                }
                retBB = ApplyTfBBMR.runJob(oprnds.inputPath, insts[1], otherInst, specWithIDs, oprnds.txMtdPath, tmpPath, outputs[bboutputs.get(0)].getFileName(), ret1.counterFile.toString(), oprnds.inputCSVProperties, numRowsTf, numColumns, numColumnsTf, replication, outHeader);
            }
            MapReduceTool.deleteFileIfExistOnHDFS(new Path(partOffsetsFile), job);
        } else {
            colNamesToIds = null;
            MapReduceTool.deleteFileIfExistOnHDFS(new Path(oprnds.txMtdPath), job);
            MapReduceTool.copyFileOnHDFS(oprnds.applyTxPath, oprnds.txMtdPath);
            specWithIDs = oprnds.spec != null ? oprnds.spec : MapReduceTool.readStringFromHDFSFile(oprnds.txMtdPath + "/spec.json");
            numColumnsTf = DataTransform.getNumColumnsTf(fs, outHeader, oprnds.inputCSVProperties.getDelim(), oprnds.txMtdPath);
            if (isCSV) {
                DMLConfig conf = ConfigurationManager.getDMLConfig();
                int blockSize = conf.getIntValue("defaultblocksize");
                CSVReblockInstruction rblk = DataTransform.prepDummyReblockInstruction(oprnds.inputCSVProperties, blockSize);
                CSVReblockMR.AssignRowIDMRReturn ret1 = CSVReblockMR.runAssignRowIDMRJob(new String[]{oprnds.inputPath}, new InputInfo[]{InputInfo.CSVInputInfo}, new int[]{blockSize}, new int[]{blockSize}, rblk.toString(), replication, new String[]{smallestFile}, true, oprnds.inputCSVProperties.getNAStrings(), specWithIDs);
                numRowsTf = ret1.rlens[0];
                if (ret1.rlens[0] == 0L) {
                    throw new DMLRuntimeException(ERROR_MSG_ZERO_ROWS);
                }
                retCSV = ApplyTfCSVMR.runJob(oprnds.inputPath, specWithIDs, oprnds.applyTxPath, tmpPath, outputs[csvoutputs.get(0)].getFileName(), ret1.counterFile.toString(), oprnds.inputCSVProperties, numColumns, replication, outHeader);
            }
            if (isBB) {
                CSVReblockInstruction rblk = (CSVReblockInstruction)InstructionParser.parseSingleInstruction(insts[1]);
                CSVReblockInstruction newrblk = (CSVReblockInstruction)rblk.clone((byte)0);
                CSVReblockMR.AssignRowIDMRReturn ret1 = CSVReblockMR.runAssignRowIDMRJob(new String[]{oprnds.inputPath}, new InputInfo[]{InputInfo.CSVInputInfo}, new int[]{newrblk.brlen}, new int[]{newrblk.bclen}, newrblk.toString(), replication, new String[]{smallestFile}, true, oprnds.inputCSVProperties.getNAStrings(), specWithIDs);
                numRowsTf = ret1.rlens[0];
                if (ret1.rlens[0] == 0L) {
                    throw new DMLRuntimeException(ERROR_MSG_ZERO_ROWS);
                }
                retBB = ApplyTfBBMR.runJob(oprnds.inputPath, insts[1], otherInst, specWithIDs, oprnds.txMtdPath, tmpPath, outputs[bboutputs.get(0)].getFileName(), ret1.counterFile.toString(), oprnds.inputCSVProperties, ret1.rlens[0], ret1.clens[0], numColumnsTf, replication, outHeader);
            }
        }
        DataTransform.moveFilesFromTmp(fs, tmpPath, oprnds.txMtdPath);
        if (retCSV != null) {
            retCSV.getMatrixCharacteristics(0).setDimension(numRowsTf, numColumnsTf);
            CSVFileFormatProperties prop = new CSVFileFormatProperties(false, oprnds.inputCSVProperties.getDelim(), false, Double.NaN, null);
            MapReduceTool.writeMetaDataFile(outputs[csvoutputs.get(0)].getFileName() + ".mtd", Expression.ValueType.DOUBLE, retCSV.getMatrixCharacteristics(0), OutputInfo.CSVOutputInfo, prop);
            return retCSV;
        }
        if (retBB != null) {
            retBB.getMatrixCharacteristics(0).setDimension(numRowsTf, numColumnsTf);
            MapReduceTool.writeMetaDataFile(outputs[bboutputs.get(0)].getFileName() + ".mtd", Expression.ValueType.DOUBLE, retBB.getMatrixCharacteristics(0), OutputInfo.BinaryBlockOutputInfo);
            return retBB;
        }
        return null;
    }

    private static CSVReblockInstruction prepDummyReblockInstruction(CSVFileFormatProperties prop, int blockSize) {
        StringBuilder sb = new StringBuilder();
        sb.append((Object)LopProperties.ExecType.MR);
        sb.append("\u00b0");
        sb.append("csvrblk");
        sb.append("\u00b0");
        sb.append("0");
        sb.append("\u00b7");
        sb.append((Object)Expression.DataType.MATRIX);
        sb.append("\u00b7");
        sb.append((Object)Expression.ValueType.DOUBLE);
        sb.append("\u00b0");
        sb.append("1");
        sb.append("\u00b7");
        sb.append((Object)Expression.DataType.MATRIX);
        sb.append("\u00b7");
        sb.append((Object)Expression.ValueType.DOUBLE);
        sb.append("\u00b0");
        sb.append(blockSize);
        sb.append("\u00b0");
        sb.append(blockSize);
        sb.append("\u00b0");
        sb.append(prop.hasHeader());
        sb.append("\u00b0");
        sb.append(prop.getDelim());
        sb.append("\u00b0");
        sb.append(prop.isFill());
        sb.append("\u00b0");
        sb.append(prop.getFillValue());
        return CSVReblockInstruction.parseInstruction(sb.toString());
    }

    private static String getOutputHeader(FileSystem fs, String headerLine, TransformOperands oprnds) throws IOException {
        String ret = null;
        if (oprnds.isApply) {
            try (BufferedReader br = new BufferedReader(new InputStreamReader((InputStream)fs.open(new Path(oprnds.applyTxPath + "/" + "column.names"))));){
                ret = br.readLine();
            }
        }
        if (oprnds.outNamesFile == null) {
            ret = headerLine;
        } else {
            try (BufferedReader br = new BufferedReader(new InputStreamReader((InputStream)fs.open(new Path(oprnds.outNamesFile))));){
                ret = br.readLine();
            }
        }
        return ret;
    }

    public static JobReturn cpDataTransform(ParameterizedBuiltinCPInstruction inst, CacheableData<?>[] inputs, MatrixObject[] outputs) throws IOException, DMLRuntimeException, IllegalArgumentException, JSONException {
        TransformOperands oprnds = new TransformOperands(inst.getParameterMap(), inputs[0]);
        return DataTransform.cpDataTransform(oprnds, inputs, outputs);
    }

    public static JobReturn cpDataTransform(String inst, CacheableData<?>[] inputs, MatrixObject[] outputs) throws IOException, DMLRuntimeException, IllegalArgumentException, JSONException {
        String[] insts = inst.split("\u2021");
        TransformOperands oprnds = new TransformOperands(insts[0], inputs[0]);
        return DataTransform.cpDataTransform(oprnds, inputs, outputs);
    }

    public static JobReturn cpDataTransform(TransformOperands oprnds, CacheableData<?>[] inputs, MatrixObject[] outputs) throws IOException, DMLRuntimeException, IllegalArgumentException, JSONException {
        JobConf job = new JobConf((Configuration)ConfigurationManager.getCachedJobConf());
        FileSystem fs = FileSystem.get((Configuration)job);
        String smallestFile = CSVReblockMR.findSmallestFile(job, oprnds.inputPath);
        String headerLine = DataTransform.readHeaderLine(fs, oprnds.inputCSVProperties, smallestFile);
        HashMap<String, Integer> colNamesToIds = DataTransform.processColumnNames(fs, oprnds.inputCSVProperties, headerLine, smallestFile);
        String outHeader = DataTransform.getOutputHeader(fs, headerLine, oprnds);
        ArrayList<Integer> csvoutputs = new ArrayList<Integer>();
        ArrayList<Integer> bboutputs = new ArrayList<Integer>();
        for (int i = 0; i < outputs.length; ++i) {
            if (outputs[i].getFileFormatProperties() != null && outputs[i].getFileFormatProperties().getFileFormat() == FileFormatProperties.FileFormat.CSV) {
                csvoutputs.add(i);
                continue;
            }
            bboutputs.add(i);
        }
        boolean isCSV = csvoutputs.size() > 0;
        boolean isBB = bboutputs.size() > 0;
        DataTransform.checkIfOutputOverlapsWithTxMtd(outputs, oprnds, isCSV, isBB, csvoutputs, bboutputs, fs);
        JobReturn ret = null;
        if (!oprnds.isApply) {
            String specWithIDs = DataTransform.processSpecFile(fs, oprnds.inputPath, smallestFile, colNamesToIds, oprnds.inputCSVProperties, oprnds.spec);
            MapReduceTool.writeStringToHDFS(specWithIDs, oprnds.txMtdPath + "/spec.json");
            ret = DataTransform.performTransform(job, fs, oprnds.inputPath, colNamesToIds.size(), oprnds.inputCSVProperties, specWithIDs, oprnds.txMtdPath, oprnds.isApply, outputs[0], outHeader, isBB, isCSV);
        } else {
            MapReduceTool.deleteFileIfExistOnHDFS(new Path(oprnds.txMtdPath), job);
            MapReduceTool.copyFileOnHDFS(oprnds.applyTxPath, oprnds.txMtdPath);
            String specWithIDs = oprnds.spec != null ? oprnds.spec : MapReduceTool.readStringFromHDFSFile(oprnds.txMtdPath + "/spec.json");
            ret = DataTransform.performTransform(job, fs, oprnds.inputPath, colNamesToIds.size(), oprnds.inputCSVProperties, specWithIDs, oprnds.txMtdPath, oprnds.isApply, outputs[0], outHeader, isBB, isCSV);
        }
        return ret;
    }

    private static ArrayList<Path> collectInputFiles(String input, FileSystem fs) throws FileNotFoundException, IOException {
        Path path = new Path(input);
        ArrayList<Path> files = new ArrayList<Path>();
        if (fs.isDirectory(path)) {
            for (FileStatus stat : fs.listStatus(path, CSVReblockMR.hiddenFileFilter)) {
                files.add(stat.getPath());
            }
            Collections.sort(files);
        } else {
            files.add(path);
        }
        return files;
    }

    private static int[] countNumRows(ArrayList<Path> files, CSVFileFormatProperties prop, FileSystem fs, TfUtils agents) throws IOException {
        int[] rows = new int[2];
        int numRows = 0;
        int numRowsTf = 0;
        OmitAgent oa = agents.getOmitAgent();
        if (!oa.isApplicable()) {
            for (int fileNo = 0; fileNo < files.size(); ++fileNo) {
                try (BufferedReader br = new BufferedReader(new InputStreamReader((InputStream)fs.open(files.get(fileNo))));){
                    if (fileNo == 0 && prop.hasHeader()) {
                        br.readLine();
                    }
                    while (br.readLine() != null) {
                        ++numRows;
                    }
                    continue;
                }
            }
            numRowsTf = numRows;
        } else {
            String line = null;
            Pattern delim = Pattern.compile(Pattern.quote(prop.getDelim()));
            for (int fileNo = 0; fileNo < files.size(); ++fileNo) {
                try (BufferedReader br = new BufferedReader(new InputStreamReader((InputStream)fs.open(files.get(fileNo))));){
                    if (fileNo == 0 && prop.hasHeader()) {
                        br.readLine();
                    }
                    while ((line = br.readLine()) != null) {
                        ++numRows;
                        String[] words = delim.split(line, -1);
                        if (oa.omit(words, agents)) continue;
                        ++numRowsTf;
                    }
                    continue;
                }
            }
        }
        rows[0] = numRows;
        rows[1] = numRowsTf;
        return rows;
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    private static JobReturn performTransform(JobConf job, FileSystem fs, String inputPath, int ncols, CSVFileFormatProperties prop, String specWithIDs, String tfMtdPath, boolean isApply, MatrixObject result, String headerLine, boolean isBB, boolean isCSV) throws IOException, DMLRuntimeException, IllegalArgumentException, JSONException {
        String[] na = TfUtils.parseNAStrings(prop.getNAStrings());
        JSONObject spec = new JSONObject(specWithIDs);
        TfUtils agents = new TfUtils(headerLine, prop.hasHeader(), prop.getDelim(), na, spec, ncols, tfMtdPath, null, null);
        MVImputeAgent _mia = agents.getMVImputeAgent();
        RecodeAgent _ra = agents.getRecodeAgent();
        BinAgent _ba = agents.getBinAgent();
        DummycodeAgent _da = agents.getDummycodeAgent();
        ArrayList<Path> files = DataTransform.collectInputFiles(inputPath, fs);
        String line = null;
        String[] words = null;
        int numColumnsTf = 0;
        if (!isApply) {
            for (int fileNo = 0; fileNo < files.size(); ++fileNo) {
                try (BufferedReader br = new BufferedReader(new InputStreamReader((InputStream)fs.open(files.get(fileNo))));){
                    if (fileNo == 0 && prop.hasHeader()) {
                        br.readLine();
                    }
                    line = null;
                    while ((line = br.readLine()) != null) {
                        agents.prepareTfMtd(line);
                    }
                    continue;
                }
            }
            if (agents.getValid() == 0L) {
                throw new DMLRuntimeException(ERROR_MSG_ZERO_ROWS);
            }
            _mia.outputTransformationMetadata(tfMtdPath, fs, agents);
            _ba.outputTransformationMetadata(tfMtdPath, fs, agents);
            _ra.outputTransformationMetadata(tfMtdPath, fs, agents);
            Path tmp = new Path(tfMtdPath);
            _mia.loadTxMtd(job, fs, tmp, agents);
            _ba.loadTxMtd(job, fs, tmp, agents);
            _da.setRecodeMapsCP(_ra.getCPRecodeMaps());
            _da.setNumBins(_ba.getColList(), _ba.getNumBins());
            _da.loadTxMtd(job, fs, tmp, agents);
        } else {
            int[] rows = DataTransform.countNumRows(files, prop, fs, agents);
            agents.setTotal(rows[0]);
            agents.setValid(rows[1]);
            if (agents.getValid() == 0L) {
                throw new DMLRuntimeException(ERROR_MSG_ZERO_ROWS);
            }
            Path tmp = new Path(tfMtdPath);
            _mia.loadTxMtd(job, fs, tmp, agents);
            _ra.loadTxMtd(job, fs, tmp, agents);
            _ba.loadTxMtd(job, fs, tmp, agents);
            _da.setRecodeMaps(_ra.getRecodeMaps());
            _da.setNumBins(_ba.getColList(), _ba.getNumBins());
            _da.loadTxMtd(job, fs, tmp, agents);
        }
        numColumnsTf = DataTransform.getNumColumnsTf(fs, headerLine, prop.getDelim(), tfMtdPath);
        MapReduceTool.deleteFileIfExistOnHDFS(result.getFileName());
        BufferedWriter out = new BufferedWriter(new OutputStreamWriter((OutputStream)fs.create(new Path(result.getFileName()), true)));
        StringBuilder sb = new StringBuilder();
        try {
            MatrixBlock mb = null;
            if (isBB) {
                int estNNZ = (int)agents.getValid() * ncols;
                mb = new MatrixBlock((int)agents.getValid(), numColumnsTf, estNNZ);
                if (mb.isInSparseFormat()) {
                    mb.allocateSparseRowsBlock();
                } else {
                    mb.allocateDenseBlock();
                }
            }
            int rowID = 0;
            for (int fileNo = 0; fileNo < files.size(); ++fileNo) {
                try (BufferedReader br = new BufferedReader(new InputStreamReader((InputStream)fs.open(files.get(fileNo))));){
                    if (fileNo == 0) {
                        if (prop.hasHeader()) {
                            br.readLine();
                        }
                        String dcdHeader = _da.constructDummycodedHeader(headerLine, agents.getDelim());
                        numColumnsTf = _da.genDcdMapsAndColTypes(fs, tfMtdPath, ncols, agents);
                        DataTransform.generateHeaderFiles(fs, tfMtdPath, headerLine, dcdHeader);
                    }
                    line = null;
                    while ((line = br.readLine()) != null) {
                        words = agents.getWords(line);
                        if (agents.omit(words)) continue;
                        words = agents.apply(words);
                        if (isCSV) {
                            out.write(agents.checkAndPrepOutputString(words, sb));
                            out.write("\n");
                        }
                        if (isBB) {
                            agents.check(words);
                            for (int c = 0; c < words.length; ++c) {
                                if (words[c] == null || words[c].isEmpty()) continue;
                                mb.appendValue(rowID, c, UtilFunctions.parseToDouble(words[c]));
                            }
                        }
                        ++rowID;
                    }
                    continue;
                }
            }
            if (mb != null) {
                mb.recomputeNonZeros();
                mb.examSparsity();
                result.acquireModify(mb);
                result.release();
                result.exportData();
            }
        }
        finally {
            IOUtilFunctions.closeSilently(out);
        }
        MatrixCharacteristics mc = new MatrixCharacteristics(agents.getValid(), numColumnsTf, (int)result.getNumRowsPerBlock(), (int)result.getNumColumnsPerBlock());
        JobReturn ret = new JobReturn(new MatrixCharacteristics[]{mc}, true);
        return ret;
    }

    public static void generateHeaderFiles(FileSystem fs, String txMtdDir, String origHeader, String newHeader) throws IOException {
        try (BufferedWriter br = new BufferedWriter(new OutputStreamWriter((OutputStream)fs.create(new Path(txMtdDir + "/" + "column.names"), true)));){
            br.write(origHeader + "\n");
        }
        br = new BufferedWriter(new OutputStreamWriter((OutputStream)fs.create(new Path(txMtdDir + "/" + "dummycoded.column.names"), true)));
        var5_5 = null;
        try {
            br.write(newHeader + "\n");
        }
        catch (Throwable throwable) {
            var5_5 = throwable;
            throw throwable;
        }
        finally {
            if (br != null) {
                if (var5_5 != null) {
                    try {
                        br.close();
                    }
                    catch (Throwable throwable) {
                        var5_5.addSuppressed(throwable);
                    }
                } else {
                    br.close();
                }
            }
        }
    }

    private static void checkIfOutputOverlapsWithTxMtd(MatrixObject[] outputs, TransformOperands oprnds, boolean isCSV, boolean isBB, ArrayList<Integer> csvoutputs, ArrayList<Integer> bboutputs, FileSystem fs) throws DMLRuntimeException {
        if (isCSV) {
            DataTransform.checkIfOutputOverlapsWithTxMtd(oprnds.txMtdPath, outputs[csvoutputs.get(0)].getFileName(), fs);
        } else if (isBB) {
            DataTransform.checkIfOutputOverlapsWithTxMtd(oprnds.txMtdPath, outputs[bboutputs.get(0)].getFileName(), fs);
        }
    }

    private static void checkIfOutputOverlapsWithTxMtd(String txMtdPath, String outputPath, FileSystem fs) throws DMLRuntimeException {
        Path path1 = new Path(txMtdPath).makeQualified(fs);
        Path path2 = new Path(outputPath).makeQualified(fs);
        String fullTxMtdPath = path1.toString();
        String fullOutputPath = path2.toString();
        if (path1.getParent().toString().equals(path2.getParent().toString()) ? fullTxMtdPath.equals(fullOutputPath) : fullTxMtdPath.startsWith(fullOutputPath) || fullOutputPath.startsWith(fullTxMtdPath)) {
            throw new DMLRuntimeException("The transform path '" + txMtdPath + "' cannot overlap with the output path '" + outputPath + "'");
        }
    }

    public static void spDataTransform(ParameterizedBuiltinSPInstruction inst, FrameObject[] inputs, MatrixObject[] outputs, ExecutionContext ec) throws Exception {
        String specWithIDs;
        SparkExecutionContext sec = (SparkExecutionContext)ec;
        TransformOperands oprnds = new TransformOperands(inst.getParams(), (CacheableData)inputs[0]);
        JobConf job = new JobConf();
        FileSystem fs = FileSystem.get((Configuration)job);
        DataTransform.checkIfOutputOverlapsWithTxMtd(oprnds.txMtdPath, outputs[0].getFileName(), fs);
        String smallestFile = CSVReblockMR.findSmallestFile(job, oprnds.inputPath);
        String headerLine = DataTransform.readHeaderLine(fs, oprnds.inputCSVProperties, smallestFile);
        HashMap<String, Integer> colNamesToIds = DataTransform.processColumnNames(fs, oprnds.inputCSVProperties, headerLine, smallestFile);
        int numColumns = colNamesToIds.size();
        String outHeader = DataTransform.getOutputHeader(fs, headerLine, oprnds);
        String tmpPath = MRJobConfiguration.constructTempOutputFilename();
        JavaPairRDD<?, ?> inputData = sec.getRDDHandleForFrameObject(inputs[0], InputInfo.CSVInputInfo);
        JavaRDD csvLines = JavaPairRDD.toRDD(inputData).toJavaRDD();
        long numRowsTf = 0L;
        long numColumnsTf = 0L;
        JavaPairRDD<Long, String> tfPairRDD = null;
        if (!oprnds.isApply) {
            specWithIDs = DataTransform.processSpecFile(fs, oprnds.inputPath, smallestFile, colNamesToIds, oprnds.inputCSVProperties, oprnds.spec);
            colNamesToIds = null;
            String partOffsetsFile = MRJobConfiguration.constructTempOutputFilename();
            numRowsTf = GenTfMtdSPARK.runSparkJob(sec, (JavaRDD<Tuple2<LongWritable, Text>>)csvLines, oprnds.txMtdPath, specWithIDs, partOffsetsFile, oprnds.inputCSVProperties, numColumns, outHeader);
            MapReduceTool.writeStringToHDFS(specWithIDs, oprnds.txMtdPath + "/spec.json");
            numColumnsTf = DataTransform.getNumColumnsTf(fs, outHeader, oprnds.inputCSVProperties.getDelim(), oprnds.txMtdPath);
            tfPairRDD = ApplyTfCSVSPARK.runSparkJob(sec, (JavaRDD<Tuple2<LongWritable, Text>>)csvLines, oprnds.txMtdPath, specWithIDs, tmpPath, oprnds.inputCSVProperties, numColumns, outHeader);
            MapReduceTool.deleteFileIfExistOnHDFS(new Path(partOffsetsFile), job);
        } else {
            colNamesToIds = null;
            MapReduceTool.deleteFileIfExistOnHDFS(new Path(oprnds.txMtdPath), job);
            MapReduceTool.copyFileOnHDFS(oprnds.applyTxPath, oprnds.txMtdPath);
            specWithIDs = oprnds.spec != null ? oprnds.spec : MapReduceTool.readStringFromHDFSFile(oprnds.txMtdPath + "/spec.json");
            numColumnsTf = DataTransform.getNumColumnsTf(fs, outHeader, oprnds.inputCSVProperties.getDelim(), oprnds.txMtdPath);
            tfPairRDD = ApplyTfCSVSPARK.runSparkJob(sec, (JavaRDD<Tuple2<LongWritable, Text>>)csvLines, oprnds.txMtdPath, specWithIDs, tmpPath, oprnds.inputCSVProperties, numColumns, outHeader);
        }
        DataTransform.moveFilesFromTmp(fs, tmpPath, oprnds.txMtdPath);
        JavaPairRDD<LongWritable, Text> outtfPairRDD = RDDConverterUtils.stringToSerializableText(tfPairRDD);
        if (outtfPairRDD != null) {
            MatrixObject outMO = outputs[0];
            String outVar = outMO.getVarName();
            outMO.setRDDHandle(new RDDObject(outtfPairRDD, outVar));
            sec.addLineageRDD(outVar, inst.getParams().get("target"));
            MatrixCharacteristics mcOut = sec.getMatrixCharacteristics(outVar);
            mcOut.setDimension(numRowsTf, numColumnsTf);
            mcOut.setNonZeros(-1L);
        }
    }

    private static class TransformOperands {
        private String inputPath = null;
        private String txMtdPath = null;
        private String applyTxPath = null;
        private String spec = null;
        private String outNamesFile = null;
        private boolean isApply = false;
        private CSVFileFormatProperties inputCSVProperties = null;

        private TransformOperands(String inst, CacheableData<?> input) {
            int pos;
            this.inputPath = input.getFileName();
            this.inputCSVProperties = (CSVFileFormatProperties)input.getFileFormatProperties();
            String[] instParts = inst.split("\u00b0");
            this.txMtdPath = instParts[3];
            this.applyTxPath = instParts[4].startsWith("applymtd=") ? instParts[4].substring(9) : null;
            this.isApply = this.applyTxPath != null;
            int n = pos = this.applyTxPath != null ? 5 : 4;
            if (pos < instParts.length) {
                String string = this.spec = instParts[pos].startsWith("spec=") ? instParts[pos++].substring(5) : null;
            }
            if (pos < instParts.length) {
                this.outNamesFile = instParts[pos].startsWith("outnames=") ? instParts[pos].substring(9) : null;
            }
        }

        private TransformOperands(HashMap<String, String> params, CacheableData<?> input) {
            this.inputPath = input.getFileName();
            this.txMtdPath = params.get("transformPath");
            this.spec = params.get("spec");
            this.applyTxPath = params.get("applyTransformPath");
            this.isApply = this.applyTxPath != null;
            this.outNamesFile = params.get("outputNames");
            this.inputCSVProperties = (CSVFileFormatProperties)input.getFileFormatProperties();
        }
    }
}

