时间:2022-09-24 10:21:00 | 栏目:Python代码 | 点击:次
在数学中,对集合的严格定义可能是抽象的且难以掌握。但实际上可以将集合简单地认为是定义明确的不同对象的集合,通常称为元素或成员。
Python 提供了一个内置的集合类型来将对象分组到一个集合中。集合与其他对象类型的区别在于可以对执行的独特操作。
集合是无序的,并且元素是唯一的,集合本身可以修改,但集合中包含的元素必须是不可变类型。
构建集合的方式
# 构建的set数据会自动进行去重 x = set(<iter>) # list方式 >>> x = set(['foo', 'bar', 'baz', 'foo', 'qux']) >>> x {'qux', 'foo', 'bar', 'baz'} # tuple方式 >>> x = set(('foo', 'bar', 'baz', 'foo', 'qux')) >>> x {'qux', 'foo', 'bar', 'baz'} # 字符串方式 >>> s = 'quux' >>> list(s) ['q', 'u', 'u', 'x'] >>> set(s) {'x', 'u', 'q'}
集合元素set后自动排序并且元素必须是不可变的。
>>> x = {42, 'foo', (1, 2, 3), 3.14159} >>> x {42, 'foo', 3.14159, (1, 2, 3)} # list和dict不能被set >>> a = [1, 2, 3] >>> {a} Traceback (most recent call last): File "<pyshell#70>", line 1, in <module> {a} TypeError: unhashable type: 'list' >>> d = {'a': 1, 'b': 2} >>> {d} Traceback (most recent call last): File "<pyshell#72>", line 1, in <module> {d} TypeError: unhashable type: 'dict'
方法 len() 、in 、 not in 的应用。
>>> x = {'foo', 'bar', 'baz'} >>> len(x) 3 >>> 'bar' in x True >>> 'qux' in x False
# x1.union(x2[, x3 ...]) # x1 | x2 [| x3 ...] >>> x1 = {'foo', 'bar', 'baz'} >>> x2 = {'baz', 'qux', 'quux'} >>> x1 | x2 {'baz', 'quux', 'qux', 'bar', 'foo'} >>> x1.union(x2) {'baz', 'quux', 'qux', 'bar', 'foo'} # 更多的集合并集操作 >>> a = {1, 2, 3, 4} >>> b = {2, 3, 4, 5} >>> c = {3, 4, 5, 6} >>> d = {4, 5, 6, 7} >>> a.union(b, c, d) {1, 2, 3, 4, 5, 6, 7} >>> a | b | c | d {1, 2, 3, 4, 5, 6, 7}
# x1.intersection(x2[, x3 ...]) # x1 & x2 [& x3 ...] >>> x1 = {'foo', 'bar', 'baz'} >>> x2 = {'baz', 'qux', 'quux'} >>> x1.intersection(x2) {'baz'} >>> x1 & x2 {'baz'} # 更多的集合交集操作 >>> a = {1, 2, 3, 4} >>> b = {2, 3, 4, 5} >>> c = {3, 4, 5, 6} >>> d = {4, 5, 6, 7} >>> a.intersection(b, c, d) {4} >>> a & b & c & d {4}
# x1.difference(x2[, x3 ...]) # x1 - x2 [- x3 ...] >>> x1 = {'foo', 'bar', 'baz'} >>> x2 = {'baz', 'qux', 'quux'} >>> x1.difference(x2) {'foo', 'bar'} >>> x1 - x2 {'foo', 'bar'} # 更多的集合差异操作 >>> a = {1, 2, 3, 30, 300} >>> b = {10, 20, 30, 40} >>> c = {100, 200, 300, 400} >>> a.difference(b, c) {1, 2, 3} >>> a - b - c {1, 2, 3}
# x1.symmetric_difference(x2) # x1 ^ x2 [^ x3 ...] >>> x1 = {'foo', 'bar', 'baz'} >>> x2 = {'baz', 'qux', 'quux'} >>> x1.symmetric_difference(x2) {'foo', 'qux', 'quux', 'bar'} >>> x1 ^ x2 {'foo', 'qux', 'quux', 'bar'} # 更多的集合对称差操作 >>> a = {1, 2, 3, 4, 5} >>> b = {10, 2, 3, 4, 50} >>> c = {1, 50, 100} >>> a ^ b ^ c {100, 5, 10}
# x1.isdisjoint(x2) >>> x1 = {'foo', 'bar', 'baz'} >>> x2 = {'baz', 'qux', 'quux'} >>> x1.isdisjoint(x2) False >>> x2 - {'baz'} {'quux', 'qux'} >>> x1.isdisjoint(x2 - {'baz'}) True # x1.isdisjoint(x2)是True,那么x1 & x2是空集 >>> x1 = {1, 3, 5} >>> x2 = {2, 4, 6} >>> x1.isdisjoint(x2) True >>> x1 & x2 set()
# x1.issubset(x2) # x1 <= x2 >>> x1 = {'foo', 'bar', 'baz'} >>> x1.issubset({'foo', 'bar', 'baz', 'qux', 'quux'}) True >>> x2 = {'baz', 'qux', 'quux'} >>> x1 <= x2 False # 一个集合被认为是它自身的一个子集 >>> x = {1, 2, 3, 4, 5} >>> x.issubset(x) True >>> x <= x True
# x1 < x2 >>> x1 = {'foo', 'bar'} >>> x2 = {'foo', 'bar', 'baz'} >>> x1 < x2 True >>> x1 = {'foo', 'bar', 'baz'} >>> x2 = {'foo', 'bar', 'baz'} >>> x1 < x2 False # 子集与真子集的判断 >>> x = {1, 2, 3, 4, 5} >>> x <= x True >>> x < x False
# x1.issuperset(x2) # x1 >= x2 >>> x1 = {'foo', 'bar', 'baz'} >>> x1.issuperset({'foo', 'bar'}) True >>> x2 = {'baz', 'qux', 'quux'} >>> x1 >= x2 False # 集合被认为是本身的一个子集,默认为自身超集 >>> x = {1, 2, 3, 4, 5} >>> x.issuperset(x) True >>> x >= x True
# x1 > x2 >>> x1 = {'foo', 'bar', 'baz'} >>> x2 = {'foo', 'bar'} >>> x1 > x2 True >>> x1 = {'foo', 'bar', 'baz'} >>> x2 = {'foo', 'bar', 'baz'} >>> x1 > x2 False # 集合不是其自身的正确超集 >>> x = {1, 2, 3, 4, 5} >>> x > x False
尽管集合中包含的元素必须是不可变类型,但集合本身可以修改。
# x1.update(x2[, x3 ...]) # x1 |= x2 [| x3 ...] >>> x1 = {'foo', 'bar', 'baz'} >>> x2 = {'foo', 'baz', 'qux'} >>> x1 |= x2 >>> x1 {'qux', 'foo', 'bar', 'baz'} >>> x1.update(['corge', 'garply']) >>> x1 {'qux', 'corge', 'garply', 'foo', 'bar', 'baz'}
# x1.intersection_update(x2[, x3 ...]) # x1 &= x2 [& x3 ...] >>> x1 = {'foo', 'bar', 'baz'} >>> x2 = {'foo', 'baz', 'qux'} >>> x1 &= x2 >>> x1 {'foo', 'baz'} >>> x1.intersection_update(['baz', 'qux']) >>> x1 {'baz'}
>>> x1 = {'foo', 'bar', 'baz'} >>> x2 = {'foo', 'baz', 'qux'} >>> x1 -= x2 >>> x1 {'bar'} >>> x1.difference_update(['foo', 'bar', 'qux']) >>> x1 set()
# x1.symmetric_difference_update(x2) # x1 ^= x2 >>> x1 = {'foo', 'bar', 'baz'} >>> x2 = {'foo', 'baz', 'qux'} >>> x1 ^= x2 >>> x1 {'bar', 'qux'} >>> >>> x1.symmetric_difference_update(['qux', 'corge']) >>> x1 {'bar', 'corge'}
>>> x = {'foo', 'bar', 'baz'} >>> x.add('qux') >>> x {'bar', 'baz', 'foo', 'qux'}
>>> x = {'foo', 'bar', 'baz'} >>> x.remove('baz') >>> x {'bar', 'foo'} # 如果元素步存在则引发异常 >>> x.remove('qux') Traceback (most recent call last): File "<pyshell#58>", line 1, in <module> x.remove('qux') KeyError: 'qux'
>>> x = {'foo', 'bar', 'baz'} >>> x.discard('baz') >>> x {'bar', 'foo'} >>> x.discard('qux') >>> x {'bar', 'foo'}
>>> x = {'foo', 'bar', 'baz'} >>> x.pop() 'bar' >>> x {'baz', 'foo'} >>> x.pop() 'baz' >>> x {'foo'} >>> x.pop() 'foo' >>> x set() >>> x.pop() Traceback (most recent call last): File "<pyshell#82>", line 1, in <module> x.pop() KeyError: 'pop from an empty set'
>>> x = {'foo', 'bar', 'baz'} >>> x {'foo', 'bar', 'baz'} >>> >>> x.clear() >>> x set()
freezeset 为 Python的内置类型,不可变、不可操作。
>>> x = frozenset(['foo', 'bar', 'baz']) >>> x frozenset({'foo', 'baz', 'bar'}) >>> len(x) 3 >>> x & {'baz', 'qux', 'quux'} frozenset({'baz'})
尝试修改 freezeset 的方法会失败
>>> x = frozenset(['foo', 'bar', 'baz']) >>> x.add('qux') Traceback (most recent call last): File "<pyshell#127>", line 1, in <module> x.add('qux') AttributeError: 'frozenset' object has no attribute 'add' >>> x.pop() Traceback (most recent call last): File "<pyshell#129>", line 1, in <module> x.pop() AttributeError: 'frozenset' object has no attribute 'pop' >>> x.clear() Traceback (most recent call last): File "<pyshell#131>", line 1, in <module> x.clear() AttributeError: 'frozenset' object has no attribute 'clear' >>> x frozenset({'foo', 'bar', 'baz'})