/*
 * Decompiled with CFR 0.152.
 */
package org.apache.hadoop.hive.ql.exec.tez;

import com.google.common.collect.ArrayListMultimap;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Multimap;
import java.io.IOException;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.exec.Operator;
import org.apache.hadoop.hive.ql.exec.TableScanOperator;
import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.exec.tez.ColumnarSplitSizeEstimator;
import org.apache.hadoop.hive.ql.io.AcidUtils;
import org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat;
import org.apache.hadoop.hive.ql.io.HiveFileFormatUtils;
import org.apache.hadoop.hive.ql.io.HiveInputFormat;
import org.apache.hadoop.hive.ql.io.orc.OrcSplit;
import org.apache.hadoop.hive.ql.plan.MapWork;
import org.apache.hadoop.hive.ql.plan.OperatorDesc;
import org.apache.hadoop.hive.ql.plan.PartitionDesc;
import org.apache.hadoop.hive.ql.plan.TableScanDesc;
import org.apache.hadoop.mapred.FileSplit;
import org.apache.hadoop.mapred.InputFormat;
import org.apache.hadoop.mapred.InputSplit;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.split.SplitLocationProvider;
import org.apache.hadoop.mapred.split.SplitSizeEstimator;
import org.apache.hadoop.mapred.split.TezGroupedSplit;
import org.apache.hadoop.mapred.split.TezMapredSplitsGrouper;
import org.apache.tez.dag.api.TaskLocationHint;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class SplitGrouper {
    private static final Logger LOG = LoggerFactory.getLogger(SplitGrouper.class);
    private final TezMapredSplitsGrouper tezGrouper = new TezMapredSplitsGrouper();
    private final Map<Path, Path> cache = Maps.newHashMap();

    public Multimap<Integer, InputSplit> group(Configuration conf, Multimap<Integer, InputSplit> bucketSplitMultimap, int availableSlots, float waves, SplitLocationProvider splitLocationProvider) throws IOException {
        Map<Integer, Integer> bucketTaskMap = this.estimateBucketSizes(availableSlots, waves, bucketSplitMultimap.asMap());
        ArrayListMultimap bucketGroupedSplitMultimap = ArrayListMultimap.create();
        Iterator iterator = bucketSplitMultimap.keySet().iterator();
        while (iterator.hasNext()) {
            int bucketId = (Integer)iterator.next();
            Collection inputSplitCollection = bucketSplitMultimap.get((Object)bucketId);
            Class<HiveInputFormat> inputFormatClass = conf.getClass("mapred.input.format.class", HiveInputFormat.class);
            if (inputFormatClass != BucketizedHiveInputFormat.class && inputFormatClass != HiveInputFormat.class) {
                inputFormatClass = HiveInputFormat.class;
            }
            InputSplit[] rawSplits = inputSplitCollection.toArray(new InputSplit[0]);
            InputSplit[] groupedSplits = this.tezGrouper.getGroupedSplits(conf, rawSplits, bucketTaskMap.get(bucketId).intValue(), inputFormatClass.getName(), (SplitSizeEstimator)new ColumnarSplitSizeEstimator(), splitLocationProvider);
            LOG.info("Original split count is " + rawSplits.length + " grouped split count is " + groupedSplits.length + ", for bucket: " + bucketId);
            for (InputSplit inSplit : groupedSplits) {
                bucketGroupedSplitMultimap.put((Object)bucketId, (Object)inSplit);
            }
        }
        return bucketGroupedSplitMultimap;
    }

    public List<TaskLocationHint> createTaskLocationHints(InputSplit[] splits, boolean consistentLocations) throws IOException {
        ArrayList locationHints = Lists.newArrayListWithCapacity((int)splits.length);
        for (InputSplit split : splits) {
            String rack;
            String string = rack = split instanceof TezGroupedSplit ? ((TezGroupedSplit)split).getRack() : null;
            if (rack == null) {
                Object[] locations = split.getLocations();
                if (locations != null && locations.length > 0) {
                    if (consistentLocations && locations.length > 1 && split instanceof FileSplit) {
                        FileSplit fileSplit = (FileSplit)split;
                        Arrays.sort(locations);
                        Path path = fileSplit.getPath();
                        long startLocation = fileSplit.getStart();
                        int hashCode = Objects.hash(path, startLocation);
                        int startIndex = hashCode % locations.length;
                        LinkedHashSet<Object> locationSet = new LinkedHashSet<Object>(locations.length);
                        for (int i = 0; i < locations.length; ++i) {
                            int index = (startIndex + i) % locations.length;
                            locationSet.add(locations[index]);
                        }
                        locationHints.add(TaskLocationHint.createTaskLocationHint(locationSet, null));
                        continue;
                    }
                    locationHints.add(TaskLocationHint.createTaskLocationHint(new LinkedHashSet<String>(Arrays.asList(split.getLocations())), null));
                    continue;
                }
                locationHints.add(TaskLocationHint.createTaskLocationHint(null, null));
                continue;
            }
            locationHints.add(TaskLocationHint.createTaskLocationHint(null, Collections.singleton(rack)));
        }
        return locationHints;
    }

    public Multimap<Integer, InputSplit> generateGroupedSplits(JobConf jobConf, Configuration conf, InputSplit[] splits, float waves, int availableSlots, SplitLocationProvider locationProvider) throws Exception {
        return this.generateGroupedSplits(jobConf, conf, splits, waves, availableSlots, null, true, locationProvider);
    }

    public Multimap<Integer, InputSplit> generateGroupedSplits(JobConf jobConf, Configuration conf, InputSplit[] splits, float waves, int availableSlots, String inputName, boolean groupAcrossFiles, SplitLocationProvider locationProvider) throws Exception {
        boolean isMinorCompaction = true;
        MapWork mapWork = SplitGrouper.populateMapWork(jobConf, inputName);
        ArrayListMultimap schemaGroupedSplitMultiMap = ArrayListMultimap.create();
        if (HiveConf.getVar((Configuration)jobConf, (HiveConf.ConfVars)HiveConf.ConfVars.SPLIT_GROUPING_MODE).equalsIgnoreCase("compactor")) {
            List<Path> paths = Utilities.getInputPathsTez(jobConf, mapWork);
            for (Path path : paths) {
                Operator<? extends OperatorDesc> op;
                List<String> aliases = mapWork.getPathToAliases().get(path);
                if (aliases == null || aliases.size() != 1 || !((op = mapWork.getAliasToWork().get(aliases.get(0))) instanceof TableScanOperator)) continue;
                TableScanOperator tableScan = (TableScanOperator)op;
                PartitionDesc partitionDesc = mapWork.getAliasToPartnInfo().get(aliases.get(0));
                if (((TableScanDesc)tableScan.getConf()).isTranscationalTable() || (isMinorCompaction &= AcidUtils.isCompactionTable(partitionDesc.getTableDesc().getProperties()))) continue;
                String splitPath = this.getFirstSplitPath(splits);
                String errorMessage = "Compactor split grouping is enabled only for transactional tables. Please check the path: " + splitPath;
                LOG.error(errorMessage);
                throw new RuntimeException(errorMessage);
            }
            return this.getCompactorSplitGroups(splits, conf, isMinorCompaction);
        }
        int i = 0;
        InputSplit prevSplit = null;
        for (InputSplit s : splits) {
            if (this.schemaEvolved(s, prevSplit, groupAcrossFiles, mapWork)) {
                ++i;
                prevSplit = s;
            }
            schemaGroupedSplitMultiMap.put((Object)i, (Object)s);
        }
        LOG.info("# Src groups for split generation: " + (i + 1));
        Multimap<Integer, InputSplit> groupedSplits = this.group((Configuration)jobConf, (Multimap<Integer, InputSplit>)schemaGroupedSplitMultiMap, availableSlots, waves, locationProvider);
        return groupedSplits;
    }

    private String getFirstSplitPath(InputSplit[] splits) {
        if (splits.length == 0) {
            throw new RuntimeException("The list of splits provided for grouping is empty.");
        }
        Path splitPath = ((FileSplit)splits[0]).getPath();
        return splitPath.toString();
    }

    Multimap<Integer, InputSplit> getCompactorSplitGroups(InputSplit[] rawSplits, Configuration conf, boolean isMinorCompaction) {
        ArrayListMultimap bucketSplitMultiMap = ArrayListMultimap.create();
        HiveInputFormat.HiveInputSplit[] splits = new HiveInputFormat.HiveInputSplit[rawSplits.length];
        int i = 0;
        for (InputSplit is : rawSplits) {
            HiveInputFormat.HiveInputSplit hiveInputSplit = (HiveInputFormat.HiveInputSplit)is;
            OrcSplit o1 = (OrcSplit)hiveInputSplit.getInputSplit();
            try {
                if (isMinorCompaction) {
                    o1.parse(conf, o1.getRootDir().getParent());
                } else {
                    o1.parse(conf);
                }
            }
            catch (IOException e) {
                throw new RuntimeException();
            }
            splits[i++] = hiveInputSplit;
        }
        Arrays.sort(splits, new ComparatorCompactor());
        TezGroupedSplit tgs = null;
        int previousWriterId = Integer.MIN_VALUE;
        Path rootDir = null;
        for (i = 0; i < splits.length; ++i) {
            int writerId = ((OrcSplit)splits[i].getInputSplit()).getBucketId();
            if (!isMinorCompaction) {
                if (rootDir == null) {
                    rootDir = ((OrcSplit)splits[i].getInputSplit()).getRootDir();
                }
                Path rootDirFromCurrentSplit = ((OrcSplit)splits[i].getInputSplit()).getRootDir();
                assert (rootDir.equals((Object)rootDirFromCurrentSplit));
            }
            if (writerId != previousWriterId) {
                tgs = new TezGroupedSplit(1, "org.apache.hadoop.hive.ql.io.HiveInputFormat", null, null);
                bucketSplitMultiMap.put((Object)writerId, (Object)tgs);
            }
            tgs.addSplit((InputSplit)splits[i]);
            previousWriterId = writerId;
        }
        return bucketSplitMultiMap;
    }

    private Map<Integer, Integer> estimateBucketSizes(int availableSlots, float waves, Map<Integer, Collection<InputSplit>> bucketSplitMap) {
        HashMap<Integer, Long> bucketSizeMap = new HashMap<Integer, Long>();
        HashMap<Integer, Integer> bucketTaskMap = new HashMap<Integer, Integer>();
        long totalSize = 0L;
        boolean earlyExit = false;
        for (int bucketId : bucketSplitMap.keySet()) {
            long size = 0L;
            for (InputSplit s : bucketSplitMap.get(bucketId)) {
                if (!(s instanceof FileSplit)) {
                    bucketTaskMap.put(bucketId, (int)((float)availableSlots * waves));
                    earlyExit = true;
                    continue;
                }
                FileSplit fsplit = (FileSplit)s;
                size += fsplit.getLength();
                totalSize += fsplit.getLength();
            }
            bucketSizeMap.put(bucketId, size);
        }
        if (earlyExit) {
            return bucketTaskMap;
        }
        for (int bucketId : bucketSizeMap.keySet()) {
            int numEstimatedTasks = 0;
            if (totalSize != 0L) {
                numEstimatedTasks = (int)((float)availableSlots * waves * (float)((Long)bucketSizeMap.get(bucketId)).longValue() / (float)totalSize);
            }
            LOG.info("Estimated number of tasks: " + numEstimatedTasks + " for bucket " + bucketId);
            if (numEstimatedTasks == 0) {
                numEstimatedTasks = 1;
            }
            bucketTaskMap.put(bucketId, numEstimatedTasks);
        }
        return bucketTaskMap;
    }

    private static MapWork populateMapWork(JobConf jobConf, String inputName) {
        MapWork work = null;
        if (inputName != null) {
            work = (MapWork)Utilities.getMergeWork((Configuration)jobConf, inputName);
        }
        if (work == null) {
            work = Utilities.getMapWork((Configuration)jobConf);
        }
        return work;
    }

    private boolean schemaEvolved(InputSplit s, InputSplit prevSplit, boolean groupAcrossFiles, MapWork work) throws IOException {
        boolean retval = false;
        Path path = ((FileSplit)s).getPath();
        PartitionDesc pd = HiveFileFormatUtils.getFromPathRecursively(work.getPathToPartitionInfo(), path, this.cache);
        String currentDeserializerClass = pd.getDeserializerClassName();
        Class<? extends InputFormat> currentInputFormatClass = pd.getInputFileFormatClass();
        Class<? extends InputFormat> previousInputFormatClass = null;
        String previousDeserializerClass = null;
        if (prevSplit != null) {
            Path prevPath = ((FileSplit)prevSplit).getPath();
            if (!groupAcrossFiles) {
                return !path.equals((Object)prevPath);
            }
            PartitionDesc prevPD = HiveFileFormatUtils.getFromPathRecursively(work.getPathToPartitionInfo(), prevPath, this.cache);
            previousDeserializerClass = prevPD.getDeserializerClassName();
            previousInputFormatClass = prevPD.getInputFileFormatClass();
        }
        if (currentInputFormatClass != previousInputFormatClass || !currentDeserializerClass.equals(previousDeserializerClass)) {
            retval = true;
        }
        LOG.debug("Adding split {} to src new group? {}", (Object)path, (Object)retval);
        return retval;
    }

    static class ComparatorCompactor
    implements Comparator<HiveInputFormat.HiveInputSplit>,
    Serializable {
        ComparatorCompactor() {
        }

        @Override
        public int compare(HiveInputFormat.HiveInputSplit h1, HiveInputFormat.HiveInputSplit h2) {
            long rowOffset2;
            if (h1 == h2) {
                return 0;
            }
            OrcSplit o1 = (OrcSplit)h1.getInputSplit();
            OrcSplit o2 = (OrcSplit)h2.getInputSplit();
            if (o1.getBucketId() != o2.getBucketId()) {
                return o1.getBucketId() < o2.getBucketId() ? -1 : 1;
            }
            if (o1.getWriteId() != o2.getWriteId()) {
                return o1.getWriteId() < o2.getWriteId() ? -1 : 1;
            }
            if (o1.getStatementId() != o2.getStatementId()) {
                return o1.getStatementId() < o2.getStatementId() ? -1 : 1;
            }
            long rowOffset1 = o1.getSyntheticAcidProps() == null ? 0L : o1.getSyntheticAcidProps().getRowIdOffset();
            long l = rowOffset2 = o2.getSyntheticAcidProps() == null ? 0L : o2.getSyntheticAcidProps().getRowIdOffset();
            if (rowOffset1 != rowOffset2) {
                return rowOffset1 < rowOffset2 ? -1 : 1;
            }
            if (o1.getStart() != o2.getStart()) {
                return o1.getStart() < o2.getStart() ? -1 : 1;
            }
            throw new RuntimeException("Found 2 equal splits: " + o1 + " and " + o2);
        }
    }
}

