Java8之根据指定属性去重

一、背景

在List<Map<String,Object>>集合中，需要根据每一个元素中指定的key对应值进行去重。
类似的，List这种对象集合中，也可能会有根据User类的某个字段进行去重的场景。

以下是mock的数据：

private static List<Map<String, Object>> getDataList()
    {
        Map<String, Object> map1 = Maps.newHashMap();
        map1.put("id", 1);
        map1.put("value", 123);

        Map<String, Object> map2 = Maps.newHashMap();
        map2.put("id", 2);
        map2.put("value", 456);

        Map<String, Object> map3 = Maps.newHashMap();
        map3.put("id", 3);
        map3.put("value", 123);

        Map<String, Object> map4 = Maps.newHashMap();
        map4.put("id", 3);
        map4.put("value", 789);
        map4.put("extra", "aaa");

        Map<String, Object> map5 = Maps.newHashMap();
        map5.put("id", 3);
        map5.put("value", 789);
        map5.put("extra", "aaaa");

        List<Map<String, Object>> dataList = Lists.newArrayList(map1, map2, map3, map4, map5);
        return dataList;
    }

二、解法

2.1 普通解法

private List<Map<String, Object>> distinctByKey1(List<Map<String, Object>> dataList, String key)
{
    if (CollectionUtils.isEmpty(dataList) || StringUtils.isEmpty(key))
    {
        return Lists.newArrayList();
    }
    Map<Object, Map<String, Object>> groupByValueMap = dataList.stream()
        .collect(Collectors.toMap(data -> data.get(key),
            Function.identity(),
            (oldValue, newValue) -> oldValue, // 控制重复时取前面的还是取后面的
            LinkedHashMap::new)); // 保留插入顺序
    return groupByValueMap.values().stream().collect(Collectors.toList());
}

// main方法中使用：
List<Map<String, Object>> dataList = getDataList();
String key = "id";
List<Map<String, Object>> distinctList = distinctByKey1(dataList, key);
System.out.println(distinctList);

2.2 新构造Wrapper类

private class DuplicateWrapper
{
    private final Map<String, Object> data;

    private final String distinctKey;

    public DuplicateWrapper(Map<String, Object> data, String distinctKey)
    {
        this.data = data;
        this.distinctKey = distinctKey;
    }

    public Map<String, Object> getData()
    {
        return data;
    }

    @Override
    public boolean equals(Object o)
    {
        if (this == o)
        {
            return true;
        }
        if (o == null || getClass() != o.getClass())
        {
            return false;
        }
        DuplicateWrapper that = (DuplicateWrapper)o;
        return Objects.equals(data.get(distinctKey), that.data.get(distinctKey));
    }

    @Override
    public int hashCode()
    {
        return Objects.hash(data.get(distinctKey));
    }
}

private List<Map<String, Object>> distinctByKey2(List<Map<String, Object>> dataList, String key)
{
    return dataList.stream()
        .map(list -> new DuplicateWrapper(list, key))
        .distinct()
        .map(DuplicateWrapper::getData)
        .collect(Collectors.toList());
}

// main方法中使用：
List<Map<String, Object>> dataList = getDataList();
String key = "id";
List<Map<String, Object>> distinctList2 = distinctByKey2(dataList, key);
System.out.println(distinctList2);

2.3 借助Predicate + ConcurrentHashMap

private static <T> Predicate<T> distinctByKey3(Function<? super T, ?> keyExtractor)
{
    final Map<Object, Boolean> seen = Maps.newConcurrentMap();
    return key -> seen.putIfAbsent(keyExtractor.apply(key), Boolean.TRUE) == null;
}

// main方法中使用：
List<Map<String, Object>> dataList = getDataList();
String key = "id";
List<Map<String, Object>> distinctList3 = dataList.stream()
        .filter(distinctByKey3(data -> data.get(key)))
        .collect(Collectors.toList());
System.out.println(distinctList3);

2.4 借助Predicate + ConcurrentHashSet

private static <T> Predicate<T> distinctByKey4(Function<? super T, ?> keyExtractor)
{
    final Set<Object> seen = ConcurrentHashMap.newKeySet();
    return key -> seen.add(keyExtractor.apply(key));
}

// main方法中使用：
List<Map<String, Object>> dataList = getDataList();
String key = "id";
List<Map<String, Object>> distinctList4 = dataList.stream()
    .filter(distinctByKey4(data -> data.get(key)))
    .collect(Collectors.toList());
System.out.println(distinctList4);

2.5 如果需要根据多个fields进行去重

private static <T> Predicate<T> distinctByMultiFields(Function<? super T, ?>... keyExtractor)
{
    final Map<List<?>, Boolean> seen = Maps.newConcurrentMap();
    return key -> {
        final List<?> keys = Arrays.stream(keyExtractor)
                .map(extractor -> extractor.apply(key))
                .collect(Collectors.toList());
        return seen.putIfAbsent(keys, Boolean.TRUE) == null;
    };
}

// main方法中使用
List<Map<String, Object>> distinctList5 = dataList.stream()
    .filter(distinctByMultiFields(data -> data.get(key), data -> data.get("value")))
    .collect(Collectors.toList());

2.6 对象场景

也是类似的，以2.5中的方法为例

User user1 = new User("id1", "name1", "email1");
User user2 = new User("id2", "name2", "email1");
User user3 = new User("id1", "name1", "email3");
List<User> userList = Lists.newArrayList(user1, user2, user3);
List<User> distinctUserList = userList.stream()
    .filter(distinctByMultiFields(User::getId,User::getName))
    .collect(Collectors.toList());
System.out.println(distinctUserList);
// [User(id=id1, name=name1, email=email1), User(id=id2, name=name2, email=email1)]

三、说明

需要保证key在Map中存在，即value不为null，否则2.3、2.4、2.5中的方法都会抛NPE，因为ConcurrentHashMap的key不能为null