一、背景

在List<Map<String,Object>>集合中,需要根据每一个元素中指定的key对应值进行去重。
类似的,List这种对象集合中,也可能会有根据User类的某个字段进行去重的场景。

以下是mock的数据:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
private static List<Map<String, Object>> getDataList()
{
Map<String, Object> map1 = Maps.newHashMap();
map1.put("id", 1);
map1.put("value", 123);

Map<String, Object> map2 = Maps.newHashMap();
map2.put("id", 2);
map2.put("value", 456);

Map<String, Object> map3 = Maps.newHashMap();
map3.put("id", 3);
map3.put("value", 123);

Map<String, Object> map4 = Maps.newHashMap();
map4.put("id", 3);
map4.put("value", 789);
map4.put("extra", "aaa");

Map<String, Object> map5 = Maps.newHashMap();
map5.put("id", 3);
map5.put("value", 789);
map5.put("extra", "aaaa");

List<Map<String, Object>> dataList = Lists.newArrayList(map1, map2, map3, map4, map5);
return dataList;
}

二、解法

2.1 普通解法

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
private List<Map<String, Object>> distinctByKey1(List<Map<String, Object>> dataList, String key)
{
if (CollectionUtils.isEmpty(dataList) || StringUtils.isEmpty(key))
{
return Lists.newArrayList();
}
Map<Object, Map<String, Object>> groupByValueMap = dataList.stream()
.collect(Collectors.toMap(data -> data.get(key),
Function.identity(),
(oldValue, newValue) -> oldValue, // 控制重复时取前面的还是取后面的
LinkedHashMap::new)); // 保留插入顺序
return groupByValueMap.values().stream().collect(Collectors.toList());
}

// main方法中使用:
List<Map<String, Object>> dataList = getDataList();
String key = "id";
List<Map<String, Object>> distinctList = distinctByKey1(dataList, key);
System.out.println(distinctList);

2.2 新构造Wrapper类

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
private class DuplicateWrapper
{
private final Map<String, Object> data;

private final String distinctKey;

public DuplicateWrapper(Map<String, Object> data, String distinctKey)
{
this.data = data;
this.distinctKey = distinctKey;
}

public Map<String, Object> getData()
{
return data;
}

@Override
public boolean equals(Object o)
{
if (this == o)
{
return true;
}
if (o == null || getClass() != o.getClass())
{
return false;
}
DuplicateWrapper that = (DuplicateWrapper)o;
return Objects.equals(data.get(distinctKey), that.data.get(distinctKey));
}

@Override
public int hashCode()
{
return Objects.hash(data.get(distinctKey));
}
}

private List<Map<String, Object>> distinctByKey2(List<Map<String, Object>> dataList, String key)
{
return dataList.stream()
.map(list -> new DuplicateWrapper(list, key))
.distinct()
.map(DuplicateWrapper::getData)
.collect(Collectors.toList());
}

// main方法中使用:
List<Map<String, Object>> dataList = getDataList();
String key = "id";
List<Map<String, Object>> distinctList2 = distinctByKey2(dataList, key);
System.out.println(distinctList2);

2.3 借助Predicate + ConcurrentHashMap

1
2
3
4
5
6
7
8
9
10
11
12
13
private static <T> Predicate<T> distinctByKey3(Function<? super T, ?> keyExtractor)
{
final Map<Object, Boolean> seen = Maps.newConcurrentMap();
return key -> seen.putIfAbsent(keyExtractor.apply(key), Boolean.TRUE) == null;
}

// main方法中使用:
List<Map<String, Object>> dataList = getDataList();
String key = "id";
List<Map<String, Object>> distinctList3 = dataList.stream()
.filter(distinctByKey3(data -> data.get(key)))
.collect(Collectors.toList());
System.out.println(distinctList3);

2.4 借助Predicate + ConcurrentHashSet

1
2
3
4
5
6
7
8
9
10
11
12
13
private static <T> Predicate<T> distinctByKey4(Function<? super T, ?> keyExtractor)
{
final Set<Object> seen = ConcurrentHashMap.newKeySet();
return key -> seen.add(keyExtractor.apply(key));
}

// main方法中使用:
List<Map<String, Object>> dataList = getDataList();
String key = "id";
List<Map<String, Object>> distinctList4 = dataList.stream()
.filter(distinctByKey4(data -> data.get(key)))
.collect(Collectors.toList());
System.out.println(distinctList4);

2.5 如果需要根据多个fields进行去重

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
private static <T> Predicate<T> distinctByMultiFields(Function<? super T, ?>... keyExtractor)
{
final Map<List<?>, Boolean> seen = Maps.newConcurrentMap();
return key -> {
final List<?> keys = Arrays.stream(keyExtractor)
.map(extractor -> extractor.apply(key))
.collect(Collectors.toList());
return seen.putIfAbsent(keys, Boolean.TRUE) == null;
};
}

// main方法中使用
List<Map<String, Object>> distinctList5 = dataList.stream()
.filter(distinctByMultiFields(data -> data.get(key), data -> data.get("value")))
.collect(Collectors.toList());

2.6 对象场景

也是类似的,以2.5中的方法为例

1
2
3
4
5
6
7
8
9
User user1 = new User("id1", "name1", "email1");
User user2 = new User("id2", "name2", "email1");
User user3 = new User("id1", "name1", "email3");
List<User> userList = Lists.newArrayList(user1, user2, user3);
List<User> distinctUserList = userList.stream()
.filter(distinctByMultiFields(User::getId,User::getName))
.collect(Collectors.toList());
System.out.println(distinctUserList);
// [User(id=id1, name=name1, email=email1), User(id=id2, name=name2, email=email1)]

三、说明

  1. 需要保证key在Map中存在,即value不为null,否则2.3、2.4、2.5中的方法都会抛NPE,因为ConcurrentHashMap的key不能为null