DotNet源码学习-HASHSET(初探)
发布时间:2021-05-21 07:26:43 所属栏目:大数据 来源: https://www.jb51.cc
导读:命名空间:System.Collections.Generic 先看一下官方说明: 类提供了高级的设置操作。 集是不包含重复元素的集合,其元素无特定顺序 。 ? HashSet T对象的容量是对象可以容纳的元素数。当向对象添加元素时,HashSet T对象的容量会自动增加。 HashSetString h
命名空间:System.Collections.Generic 先看一下官方说明:类提供了高级的设置操作。集是不包含重复元素的集合,其元素无特定顺序。? HashSet <T>对象的容量是对象可以容纳的元素数。当向对象添加元素时,HashSet <T>对象的容量会自动增加。 HashSet<String> hashSet = new HashSet<string>(); hashSet.Add("test"); hashSet.Add(); Console.WriteLine(hashSet.Count); 打印结果:1 HashSet的默认构造方法: public HashSet() : this((IEqualityComparer<T>?)null) { } 注:: this语法为调用自身对象的其他构造方法。 public HashSet(IEqualityComparer<T>? comparer) { if (comparer == EqualityComparer<T>.Default) { comparer = ; } _comparer = comparer; _lastIndex = 0; _count = ; _freeList = -1; _version = ; } 第二中创建方式,将集合作为参数。 List<string> list = new List<(); list.Add(); list.Add(); HashSet<string> hSet = (list); Console.WriteLine(hSet.Count); 此时控台输出:1 此时调用的构造方法为: public HashSet(IEnumerable<T> collection,IEqualityComparer<T>? comparer) : this(comparer) { if (collection == ) { throw new ArgumentNullException(nameof(collection)); } var otherAsHashSet = collection as HashSet<T>; if (otherAsHashSet != null && AreEqualityComparersEqual(,otherAsHashSet)) { CopyFrom(otherAsHashSet); } else { // to avoid excess resizes,first set size based on collection's count. Collection may contain duplicates,so call TrimExcess if resulting hashset is larger than threshold ICollection<T>? coll = collection as ICollection<T>; int suggestedCapacity = coll == null ? : coll.Count; Initialize(suggestedCapacity); UnionWith(collection); if (_count > 0 && _slots.Length / _count > ShrinkThreshold) { TrimExcess(); } } } 在该构造方法中若存在重复值则通过查找大于或等于容量的下一个质数来使用建议的容量。 private int Initialize(int capacity) { Debug.Assert(_buckets == null,Initialize was called but _buckets was non-null); int size = HashHelpers.GetPrime(capacity); _buckets = new [size]; _slots = Slot[size]; return size; } 下面为生成质数方法: public static int GetPrime( min) { if (min < ) ArgumentException(SR.Arg_HTCapacityOverflow); foreach (int prime in s_primes) { if (prime >= min) prime; } Outside of our predefined table. Compute the hard way. for (int i = (min | 1); i < int.MaxValue; i += 2if (IsPrime(i) && ((i - 1) % HashPrime != )) i; } min; } 再次扩展-》 bool IsPrime( candidate) { if ((candidate & 1) != ) { int limit = (int)Math.Sqrt(candidate);取平方 int divisor = 3; divisor <= limit; divisor += ) { if ((candidate % divisor) == return false; } true; } return candidate == ; } internal struct Slot { int hashCode; Lower 31 bits of hash code,-1 if unused int next; Index of next entry,-1 if last internal T value; } 存储LIst集合: void UnionWith(IEnumerable<T> other) { if (other == ArgumentNullException(nameof(other)); } foreach (T item other) { AddIfNotPresent(item); } } 继续往下追踪: bool AddIfNotPresent(T value) { if (_buckets == ) { Initialize(); } hashCode; bucket; int collisionCount = ; Slot[] slots = _slots; IEqualityComparer<T>? comparer = _comparer; if (comparer == ) { 取HASHCODE hashCode = value == : InternalGetHashCode(value.GetHashCode()); bucket = hashCode % _buckets!.Length; if (default(T)! != null) TODO-NULLABLE: default(T) == null warning (https://github.com/dotnet/roslyn/issues/34757) { int i = _buckets[bucket] - 1; i >= 0; i = slots[i].next) { if (slots[i].hashCode == hashCode && EqualityComparer<T>.Default.Equals(slots[i].value,value)) { ; } if (collisionCount >= slots.Length) { The chain of entries forms a loop,which means a concurrent update has happened. InvalidOperationException(SR.InvalidOperation_ConcurrentOperationsNotSupported); } collisionCount++; } } { Object type: Shared Generic,EqualityComparer<TValue>.Default won't devirtualize // https://github.com/dotnet/coreclr/issues/17273 So cache in a local rather than get EqualityComparer per loop iteration EqualityComparer<T> defaultComparer = EqualityComparer<T>.Default; if (slots[i].hashCode == hashCode && defaultComparer.Equals(slots[i].value,1)">; } } } { hashCode = value == : InternalGetHashCode(comparer.GetHashCode(value)); bucket = hashCode % _buckets! slots[i].next) { comparer.Equals(slots[i].value,value)) { ; } slots.Length) { InvalidOperationException(SR.InvalidOperation_ConcurrentOperationsNotSupported); } collisionCount++; } } index; if (_freeList >= ) { index = _freeList; _freeList = slots[index].next; } { if (_lastIndex == slots.Length) { IncreaseCapacity(); this will change during resize slots = _slots; bucket = hashCode % _buckets.Length; } index = _lastIndex; _lastIndex++; } slots[index].hashCode = hashCode; slots[index].value = value; slots[index].next = _buckets[bucket] - ; _buckets[bucket] = index + ; _count++; _version++; ; } const int Lower31BitMask = 0x7FFFFFFF; 获取内部的HASHCODE int InternalGetHashCode(T item,1)"> comparer) { if (item == return int hashCode = comparer?.GetHashCode(item) ?? item.GetHashCode(); return hashCode & Lower31BitMask; } 划重点-》 slots[index].hashCode = hashCode; slots[index].value = value; slots[index].next = _buckets[bucket] - 1; 最终列表中的值存储到结构中。 使用对象初始化HASHSET时,如果相同 ? HashSet<string> hashSet = (); hashSet.Add(); hashSet.Add(); HashSet<string>(hashSet); void CopyFrom(HashSet<T> source) { int count = source._count; if (count == As well as short-circuiting on the rest of the work done, this avoids errors from trying to access otherAsHashSet._buckets or otherAsHashSet._slots when they aren't initialized. ; } int capacity = source._buckets!.Length; int threshold = HashHelpers.ExpandPrime(count + ); if (threshold >= capacity) { _buckets = ([])source._buckets.Clone(); _slots = (Slot[])source._slots.Clone(); _lastIndex = source._lastIndex; _freeList = source._freeList; } int lastIndex = source._lastIndex; Slot[] slots = source._slots; Initialize(count); int index = int i = 0; i < lastIndex; ++i) { int hashCode = slots[i].hashCode; if (hashCode >= ) { AddValue(index,hashCode,slots[i].value); ++index; } } Debug.Assert(index == count); _lastIndex = index; } _count = count; } int ExpandPrime(int oldSize)返回要增长到的哈希表大小 { int newSize = 2 * oldSize; Allow the hashtables to grow to maximum possible size (~2G elements) before encountering capacity overflow. Note that this check works even when _items.Length overflowed thanks to the (uint) cast if ((uint)newSize > MaxPrimeArrayLength && MaxPrimeArrayLength > oldSize) { Debug.Assert(MaxPrimeArrayLength == GetPrime(MaxPrimeArrayLength),1)">Invalid MaxPrimeArrayLength MaxPrimeArrayLength; } GetPrime(newSize); } 这里只贴出HashSet声明创建对象的两种方式。 下篇再研究具体实现? (编辑:北几岛) 【声明】本站内容均来自网络,其相关言论仅代表作者个人观点,不代表本站立场。若无意侵犯到您的权利,请及时与联系站长删除相关内容! |