评论

收藏

[python] gj6 深入python的set和dict

编程语言 编程语言 发布于:2021-06-24 10:23 | 阅读数:632 | 评论:0

6.1 collections中的abc
from collections.abc import Mapping, MutableMapping
#dict属于mapping类型
a = {}
print (isinstance(a, MutableMapping))
# True
6.2 dict的常见用法
a = {"lewen1": {"company": "imooc"},
"lewen2": {"company": "imooc2"}
 }
# clear   Remove all items from D.
# a.clear()
# pass
​
# copy, 返回浅拷贝
new_dict = a.copy()
new_dict["lewen1"]["company"] = "imooc3"  # 浅拷贝,只是拷贝了指向。这里修改会修改了a 中原来的值
print(new_dict)
print(a)
​
​
{'lewen1': {'company': 'imooc3'}, 'lewen2': {'company': 'imooc2'}}
{'lewen1': {'company': 'imooc3'}, 'lewen2': {'company': 'imooc2'}}
---
import copy
a = {"lewen1": {"company": "imooc"},
"lewen2": {"company": "imooc2"}
 }
new_dict = copy.deepcopy(a)         # 深拷贝,开辟独立的内存空间,并复制值
new_dict["lewen1"]["company"] = "imooc3"  # 这里修改,并不会影响字典a的值
print(new_dict)
print(a)
{'lewen1': {'company': 'imooc3'}, 'lewen2': {'company': 'imooc2'}}
{'lewen1': {'company': 'imooc'}, 'lewen2': {'company': 'imooc2'}}
---
 
# formkeys
new_list = ["lewen1", "lewen2"]
new_dict = dict.fromkeys(new_list, {"company": "imooc"})
print(new_dict)
​
# new_dict["kevin"]  # KeyError 不存在会抛异常
ret = new_dict.get("kevin","None") # 
print(ret)
​
# items 方法
for key,value in new_dict.items():
 print(key,value)
ret_set = new_dict.setdefault("kevin","new mem")  # 不存在key,就设置并返回值
print(ret_set)
print(new_dict)
​
​new_dict.update(    # update() 括号里面添加为可迭代对象
(("lewen", "imooc"),)
 )
{'lewen1': {'company': 'imooc'}, 'lewen2': {'company': 'imooc'}}
None
lewen1 {'company': 'imooc'}
lewen2 {'company': 'imooc'}
new mem
{'lewen1': {'company': 'imooc'}, 'lewen2': {'company': 'imooc'}, 'kevin': 'new mem'}
​
  6.3 dict的子类

#不建议继承list和dict
class Mydict(dict):
 def __setitem__(self, key, value):
    super().<span style="color:rgba(0,0,255,1);">__setitem__</span>(key, value*2)
my_dict = Mydict(one=1)   # value*2 没有生效# my_dict["one"] = 1      # 生效了print (my_dict)  {'one': 1}
---
from collections import UserDict
class Mydict(UserDict):
 def __setitem__(self, key, value):
    super().<span style="color:rgba(0,0,255,1);">__setitem__</span>(key, value*2)
  my_dict = Mydict(one=1)
# my_dict["one"] = 1print (my_dict){'one': 2}
---
# defaultdict 
from collections import defaultdict
my_dict = defaultdict(dict)
my_value = my_dict["bobby"]  # 没有则返回空字典
print(my_value)
{}
6.4 set和frozenset
#set 集合 fronzenset (不可变集合) 无序, 不重复
s = set('abcdee')
print(s)
s2 = set(['a','b','c','d','e'])
print(s2)
s3 = {'a','b', 'c'}
print(type(s3))
s = frozenset("abcde") #frozenset 不可变,以作为dict的key
             <span style="color:rgba(0,128,0,1);"># 不能添加值</span>
print(s)# ---  {'a', 'e', 'c', 'd', 'b'}
  {'a', 'e', 'c', 'd', 'b'}
  <class 'set'>
frozenset({'a', 'e', 'c', 'd', 'b'})
# ---
#向set添加数据
s = set('abcdee')
another_set = set("cef")
s.update(another_set)
print(s)
re_set = s.difference(another_set)  # {'b', 'd', 'a'}
re_set = s - another_set      # {'b', 'd', 'a'}
re_set = s & another_set      # {'c', 'f', 'e'}
re_set = s | another_set        # {'a', 'f', 'c', 'e', 'd', 'b'}
#set性能很高
# | & -  #集合运算
print(re_set)
print (s.issubset(re_set))
if "c" in re_set:
 print ("i am in set")
# ---
{'a', 'f', 'e', 'c', 'd', 'b'}
{'a', 'f', 'c', 'e', 'd', 'b'}
True
i am in set
6.5 dict和set实现原理

DSC0000.gif DSC0001.gif
from random import randint
def load_list_data(total_nums, target_nums):
"""
从文件中读取数据,以list的方式返回
:param total_nums: 读取的数量
:param target_nums: 需要查询的数据的数量
"""
all_data = []
target_data = []
file_name = "D:\电子书\Python面试宝典Version8.1.pdf"
with open(file_name, encoding="utf8", mode="r") as f_open:
    for <span style="color:rgba(0,0,255,1);">count</span>, line in enumerate(f_open):
      <span style="color:rgba(0,0,255,1);">if</span> <span style="color:rgba(0,0,255,1);">count</span> &lt; total_nums:
        all_data.append(line)
      <span style="color:rgba(0,0,255,1);">else</span>:
        break
  for x in range(target_nums):
random_index = randint(0, total_nums)
    <span style="color:rgba(0,0,255,1);">if</span> all_data[random_index] not in target_data:
      target_data.append(all_data[random_index])
      <span style="color:rgba(0,0,255,1);">if</span> len(target_data) == target_nums:
        break
  return all_data, target_data
def load_dict_data(total_nums, target_nums):  """
  从文件中读取数据,以dict的方式返回
  :param total_nums: 读取的数量
  :param target_nums: 需要查询的数据的数量
  """
  all_data = {}
  target_data = []
  file_name = "D:\电子书\Python面试宝典Version8.1.pdf"
  with open(file_name, encoding="utf8", mode="r") as f_open:
for <span style="color:rgba(0,0,255,1);">count</span>, line in enumerate(f_open):
      <span style="color:rgba(0,0,255,1);">if</span> <span style="color:rgba(0,0,255,1);">count</span> &lt; total_nums:
        all_data[line] = 0
      <span style="color:rgba(0,0,255,1);">else</span>:
        break
  all_data_list = list(all_data)
  for x in range(target_nums):
    random_index = randint(0, total_nums-1)
    <span style="color:rgba(0,0,255,1);">if</span> all_data_list[random_index] not in target_data:
      target_data.append(all_data_list[random_index])
      <span style="color:rgba(0,0,255,1);">if</span> len(target_data) == target_nums:
        break
  return all_data, target_data
def find_test(all_data, target_data):    #测试运行时间  test_times = 100
  total_times = 0
import time  for i in range(test_times):
<span style="color:rgba(0,0,255,1);">find</span> = 0
    start_time = <span style="color:rgba(0,0,255,1);">time</span>.<span style="color:rgba(0,0,255,1);">time</span>()
    for <span style="color:rgba(0,0,255,1);">data</span> in target_data:
      <span style="color:rgba(0,0,255,1);">if</span> <span style="color:rgba(0,0,255,1);">data</span> in all_data:
        <span style="color:rgba(0,0,255,1);">find</span> += 1
    last_time = <span style="color:rgba(0,0,255,1);">time</span>.<span style="color:rgba(0,0,255,1);">time</span>() - start_time
    total_times += last_time
  <span style="color:rgba(0,0,255,1);">return</span> total_times/test_times
  if __name__ == "__main__":
# all_data, target_data = load_list_data(10000, 1000)    # all_data, target_data = load_list_data(100000, 1000)  # all_data, target_data = load_list_data(1000000, 1000)
# all_data, target_data = load_dict_data(10000, 1000)    # all_data, target_data = load_dict_data(100000, 1000)    # all_data, target_data = load_dict_data(1000000, 1000)  all_data, target_data = load_dict_data(2000000, 1000)
  last_time = find_test(all_data, target_data)
  view
#dict查找的性能远远大于list
  #在list中随着list数据的增大 查找时间会增大
  #在dict中查找元素不会随着dict的增大而增大
 print(last_time)
#1.dict的key或者set的值 都必须是可以hash的
#不可变对象 都是可hash的, str, fronzenset, tuple,自己实现的类 __hash__
#2. dict的内存花销大(有大量空余的表元),但是查询速度快, 自定义的对象 或者python内部的对象都是用dict包装的
# 3. dict的存储顺序和元素添加顺序有关
# 4. 添加数据有可能改变已有数据的顺序
DSC0002.png   哈希冲突后重新计算位置
  在剩余空间小于三分之一时,申请更大的空间,然后数据搬迁,有可能会改变顺序

DSC0003.png
关注下面的标签,发现更多相似文章