lcDisjointSetUnion1 - 并查集1

1 并查集

关键理解: 并:是通过一条边将两个没有公共子集的集合合并,查:每个子集的所有子项对应的root是相同的

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
class DSU{
public:
vector<int> parent;
vector<int> subTreeSize;

DSU(int n) {
parent.resize(n);
subTreeSize.resize(n);
for(int i = 0; i < n; ++i) {
parent[i] = i;
subTreeSize[i] = 1;
}
}

int find(int x) {
while(x != parent[x]) {
parent[x] = parent[parent[x]];
x = parent[x];
}
return x;
}

bool unionMerge(int x, int y) {
int findX = find(x);
int findY = find(y);
if(findX != findY) {
parent[findX] = findY;
subTreeSize[findY] += subTreeSize[findX];
return true;
}
return false;
}

int maxComponentSize() {
return *max(subTreeSize.begin(), subTreeSize.end());
}
};

0952 最大因数联通分量大小

1 题目

https://leetcode-cn.com/problems/largest-component-size-by-common-factor

2 解题思路

个人体悟: 当需要判断两个节点是否位于同一个连通子图时,首选并查集

  • 1 普通思路:
    • 1.1 很简单,暴力的去检测两两节点之间的连接性,然后构建并查集,求解最大的分量值即可;
  • 2 优化思路:
    • 1 中需要o(n^2)的复杂度去计算结点之间的链接性,我们不去计算节点的连接性,改为计算他们质数因子的连接性
    • 2 对于每个数字提取出质因子列表,然后因为这个数的存在,可以将这些质因子联系起来,将所有数字的质因子计作L
    • 3 而后我们统计每个数字,他对应的属于哪个质因子的联通分量,然后对该联通分量的root计数加1即可,最后找出所有的质因子联通分量最大的计数即可。
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
class Solution {
public:
class DSU{
public:
vector<int> parent;
vector<int> subTreeSize;

DSU(int n) {
parent.resize(n);
subTreeSize.resize(n);
for(int i = 0; i < n; ++i) {
parent[i] = i;
subTreeSize[i] = 1;
}
}

int find(int x) {
while(x != parent[x]) {
parent[x] = parent[parent[x]];
x = parent[x];
}
return x;
}

bool unionMerge(int x, int y) {
int findX = find(x);
int findY = find(y);
if(findX != findY) {
parent[findX] = findY;
subTreeSize[findY] += subTreeSize[findX];
return true;
}
return false;
}

int maxComponentSize() {
return *max(subTreeSize.begin(), subTreeSize.end());
}
};

int getGreatestCommonDivisor(int x, int y) {
int res = 0;
int mod = 0;
do {
mod = x % y;
x = y;
y = mod;

} while(mod != 0);
// cout << y << endl;
return x;
}

int largestComponentSize(vector<int>& nums) {
int n = nums.size();
int maxComponentSize = INT_MIN;

// DSU* dsu = new DSU(n);
// travel all pairs and construct the dsu, o(n ** 2), too slow
// for(int i = 0; i < n; ++i) {
// for(int j = i; j < n; ++j) {
// if(getGreatestCommonDivisor(nums[i], nums[j]) != 1) {
// dsu->unionMerge(i, j);
// }
// }
// }
//
// return *max_element(dsu->subTreeSize.begin(), dsu->subTreeSize.end());

// we shall understand the fact that:
// union: union by edeg, but edge denote two set

// find a number's prime factors:
map<int, vector<int>> numberToPrimes;
for(auto num : nums) {
vector<int> primes;
int x = num;

int d = 2;
while(d * d <= num) {
if(num % d == 0) {
// cull out all d
while(num % d == 0) {
num = num / d;
}
primes.emplace_back(d);
}
++d;
}
if(num > 1 || primes.empty()) {
primes.emplace_back(num);
}
numberToPrimes[x] = primes;
}

// form all the factors and numbers into nodes
unordered_set<int> factors;
for(auto& p : numberToPrimes) {
for(auto& fac : p.second) {
factors.insert(fac);
}
}

unordered_map<int, int> facToNode;
int i = 0;
for(auto fac : factors) {
facToNode[fac] = i++;
}

DSU* dsu = new DSU(factors.size());
// union those numbers by factors
for(auto p : numberToPrimes) {
vector<int> primes = p.second;
// union a number's all factors, we need union action: primes.size() times
for(auto prime : primes) {
// cout << p.first << "->" << prime << endl;
dsu->unionMerge(facToNode[primes[0]], facToNode[prime]);
}

}

// for each number, find the union root of this number
// all numbers who are connected will share the same root
vector<int> cnt(factors.size());
for(auto p : numberToPrimes) {
cnt[dsu->find(facToNode[p.second[0]])]++;
}

return *max_element(cnt.begin(), cnt.end());
// return *max_element(dsu->subTreeSize.begin(), dsu->subTreeSize.end());
// return dsu->maxComponentSize();
}
};

0928 minMalwareSpread 最少病毒传播

1 题目

https://leetcode-cn.com/problems/minimize-malware-spread-ii/

2 解题思路

个人体悟: 当需要判断两个节点是否位于同一个连通子图时,首选并查集

  • 1 普通思路1
    • 1.1 使用dfs,对于每个initial,则计算仅仅由他能传播到的节点有多少
  • 2 使用并查集:
    • 明确目标:对于每个initial,则计算仅仅由他能传播到的节点有多少
    • 那么就首先构造不含initial的一个图G
    • 遍历initial中的每个顶点,那么G中每个root都可以知道会被哪些initial所影响
    • 于是每个initial的贡献为仅仅被自己所连接的root(也就是仅由自己才能传播过去)(换句话说,若G中的一个连通子图,会被好几个initial传播,那么只删除一个initial是没办法改变这个联通子图会被传播的下场)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
class Solution {
public:
int find(vector<int>& parent, int x) {
while(x != parent[x]) {
parent[x] = parent[parent[x]];
x = parent[x];
}
return x;
}

bool unionMerge(vector<int>& parent, vector<int>& subTreeSize, int x, int y) {
int findX = find(parent, x);
int findY = find(parent, y);
if(findX != findY) {
parent[findX] = findY;
subTreeSize[findY] += subTreeSize[findX];
return true;
}
return false;
}


int minMalwareSpread(vector<vector<int>>& graph, vector<int>& initial) {
// 对于每个initial计数的方式发生了改变,考虑一个不含有inital 的图,initial的得分应该是单独被他影响的子集
int n = graph.size();
vector<int> parent(n);
for(int i = 0; i < n; ++i) {
parent[i] = i;
}

vector<bool> isInitial(n, false);
for(auto& i : initial) {
isInitial[i] = true;
}

vector<int> subTreeSize(n, 1);
// 构造union find set
for(int i = 0; i < n; ++i) {
if(isInitial[i]) {
continue;
}
for(int j = 0; j < n; ++j) {
if(1 == graph[i][j] && !isInitial[j]) {
unionMerge(parent, subTreeSize, i, j);
}
}
}

// for(int i = 0; i < n; ++i) {
// cout << "root " << i << " -> size " << subTreeSize[i] << endl;
// }

// find componets infected by a initial
unordered_map<int, unordered_set<int>> initialToComponents;
unordered_map<int, int> rootToInitial;
for(auto& i : initial) {
unordered_set<int> components;
for(int v = 0; v < n; ++v) {
if(!isInitial[v]) {
if(graph[i][v] == 1) {
int root = find(parent, v);
components.insert(root);
// cout << " ||| " << i << " >> " << root << endl;
// no occupied by single initial
if(rootToInitial.find(root) != rootToInitial.end() && \
rootToInitial[root] != i) {
rootToInitial[root] = -1;
continue;
}
rootToInitial[root] = i;
}
}
}
initialToComponents[i] = components;
}


// cal scores for each initial
int resInit = INT_MAX;
int maxScore = INT_MIN;
for(auto& p : initialToComponents) {
int curInit = p.first;
unordered_set<int> components = p.second;
int score = 0;
for(auto& root : components) {
// std::cout << curInit << " -> " << score << " with com: " << root << endl;
if(rootToInitial[root] != -1) {
score += subTreeSize[root];
// cout << "added!" << endl;
}
}
if(score > maxScore || (score == maxScore && curInit < resInit)) {
maxScore = score;
resInit = curInit;
}
}

return resInit;
}
};

0924 minMalwareSpread 最少病毒传播

1 题目

https://leetcode-cn.com/problems/minimize-malware-spread/

2 解题思路

个人体悟: 当需要判断两个节点是否位于同一个连通子图时,首选并查集

  • 1 普通思路1
    • 1.1 考虑每个候选移除顶点,bfs看它能传播多少,如果传播的过程中遇到任何其他候选点,这说明减少该候选点和其位于同一个子图的候选点都是无法减少病毒传播的
    • 1.2 之后我们得到那些候选移除顶点,他们各自的连通子图中就没有其他候选点,我们看连通子图大小,找出子图最大的那个候选点,作为结果输出。
    • 1.3 若没有1.2中的候选点,则根本无法减少传播,于是直接输出所有候选点中的最小值即可。
  • 2 使用并查集或者bfs来上色

    同 方法一 一样,也得找出图中所有的连通分量,不同的是这一步用并查集来做。

    在并查集中会额外计算连通分量的大小,当合并两个连通分量的时候,会把它们的大小进行累加。

    借助并查集,可以用 方法一 中一样的思路处理:对于 initial 中每个颜色唯一的节点,都去计算连通分量的大小,从中找到最优解。如果 initial 中没有颜色唯一的节点,直接返回 min(initial)。

    简洁起见,实现的并查集没有根据 rank 合并,这会让渐进复杂度变大一点。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
class Solution {
public:
int find(vector<int>& parent, int x) {
while(x != parent[x]) {
parent[x] = parent[parent[x]];
x = parent[x];
}
return x;
}

bool unionMerge(vector<int>& parent, vector<int>& subTreeSize, int x, int y) {
int findX = find(parent, x);
int findY = find(parent, y);

if(findX != findY) {
parent[findX] = findY;
subTreeSize[findY] += subTreeSize[findX];
return true;
}
return false;
}

int minMalwareSpread(vector<vector<int>>& graph, vector<int>& initial) {
int n = graph.size();
vector<int> parent(n);
vector<int> subTreeSize(n);

for(int i = 0; i < n; ++i) {
parent[i] = i;
subTreeSize[i] = 1;
}

map<int, int> connectSizeToInitial;
// cal each connected component's size, attach initial with a component's "color"
for(int i = 0; i < n; ++i) {
for(int j = 0; j < n; ++j) {
if(graph[i][j] == 1) {
unionMerge(parent, subTreeSize, i, j);
}
}
}

map<int, int> rootToInit;

// if two initial number in same connected component, then remove one of them
// will cause the same malware spread result
// each color pick a smallest idx as res candidate in initial, finally pick the max connect size
// find max connected size
int maxConnectSize = INT_MIN;
for(auto& i : initial) {
int curRoot = find(parent, i);
// cout << i << " ->> " << curRoot << endl;
if(rootToInit.find(curRoot) != rootToInit.end()) {
rootToInit[curRoot] = -1;
continue;
}
rootToInit[curRoot] = i;
}

bool hasLonelyInitial = false;
for(auto& i : initial) {
// those initial who do not share root
if(rootToInit[find(parent, i)] != -1) {
// cout << "valid!" << endl;
hasLonelyInitial = true;
int curSubSize = subTreeSize[find(parent, i)];
if(connectSizeToInitial.find(curSubSize) != connectSizeToInitial.end()) {
connectSizeToInitial[curSubSize] = min(connectSizeToInitial[curSubSize], i);
continue;
}
connectSizeToInitial[curSubSize] = i;
maxConnectSize = max(maxConnectSize, curSubSize);
// cout << maxConnectSize << endl;
}
}


// for(auto& p : connectSizeToInitial) {
// cout << p.first << " -<<> " << p.second << endl;
// }

// for(auto& p : rootToInit) {
// cout << p.first << " -> " << p.second << endl;
// }

if(hasLonelyInitial) {
return connectSizeToInitial[maxConnectSize];
}
return *min_element(initial.begin(), initial.end());
}
}

0778 泳池游泳上升泳池

1 题目

https://leetcode-cn.com/problems/swim-in-rising-water/

2 解题思路

  • 1 普通思路1
    • 对于每一个水位,采用bfs算法看是否能够到达,那么对于水位可以用二分法加速o(logn),bfs要o(n^2),所以o(n^2logn)
  • 2 普通思路2
    • 仔细理解改题目,问的是a和b什么时候能够连通的问题?那么自然就想到并查集,反复说一句哈:并查集并的子树,查的是子树的root
    • 对于水位从低到高遍历,每次更新和当前水位相同高度的格子(注意,题目说的非常清除,所有的格子的值在0到n^2,且不同)和周围联通情况
    • 每次更新完连通情况我们看一下是否能够使得左上角和右下角的两个节点相互连通即可。
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
class Solution {
public:
int find(vector<int>& parent, int x) {
while(x != parent[x]) {
parent[x] = parent[parent[x]];
x = parent[x];
}

return x;
}

bool unionMerge(vector<int>& parent, int x, int y) {
int findX = find(parent, x);
int findY = find(parent, y);
if(findX != findY) {
parent[findX] = findY
return true;
}
return false;
}

int swimInWater(vector<vector<int>>& grid) {
// for each threshold, maintain a unionFind
// everytime increase thres, we modify the connection of unionFind
int n = grid.size();

if(n == 1) {
return 0;
}

vector<int> parent(n * n);
for(int i = 0; i < n * n; ++i) {
parent[i] = i;
}

// Each value grid[i][j] is unique.
vector<vector<int>> elevationToIdx(n * n, vector<int>(n));
for(int i = 0; i < n; ++i) {
for(int j = 0; j < n; ++j) {
elevationToIdx[grid[i][j]][0] = i;
elevationToIdx[grid[i][j]][1] = j;
}
}

vector<vector<int>> moves = {{0, 1}, {1, 0}, {0, -1}, {-1, 0}};
for(int thres = 0; thres < n * n; ++thres) {
// when the rain rise, process the exact the same height grid
int tarX = elevationToIdx[thres][0];
int tarY = elevationToIdx[thres][1];
for(auto deltaXY : moves) {
int newX = tarX + deltaXY[0];
int newY = tarY + deltaXY[1];
if(newX >= 0 && newY >= 0 && newX < n && newY < n && grid[newX][newY] <= thres) {
// cout << newX << " " << newY << " ->" << thres <<endl;
unionMerge(parent, grid[newX][newY], grid[tarX][tarY]);
if(find(parent, grid[0][0]) == find(parent, grid[n - 1][n - 1])) {
return thres;
}
}
}
}
return n*n;
}
}