时间:2020-12-08 22:10:19 | 栏目:Python代码 | 点击:次
背景:有一个list,里面的每一个元素都是dict,根据某一个key进行去重,在这里,key代表question
#!/usr/bin/env python # -*- coding: utf-8 -*- # created by fhqplzj on 2017/12/07 上午11:38 from itertools import groupby from operator import itemgetter import pandas as pd def distinct(items): questions = map(itemgetter('question'), items) df = pd.DataFrame({ 'items': items, 'questions': questions }) return df.drop_duplicates(['questions'])['items'].tolist() def distinct2(items): exist_questions = set() result = [] for item in items: question = item['question'] if question not in exist_questions: exist_questions.add(question) result.append(item) return result def distinct3(items): key = itemgetter('question') items = sorted(items, key=key) return [next(v) for _, v in groupby(items, key=key)] def distinct4(items): from itertools import compress mask = (~pd.Series(map(itemgetter('question'), items)).duplicated()).tolist() return list(compress(items, mask)) if __name__ == '__main__': data = [ {'question': 'a', 'ans': 'b'}, {'question': 'b', 'ans': 'd'}, {'question': 'a', 'ans': 'p'}, {'question': 'b', 'ans': 'e'} ] print distinct4(data)