Codeforces Round 438 by Sberbank and Barcelona Bootcamp

F. Yet Another Minimization Problem

time limit per test 2 seconds
memory limit per test 256 megabytes
input standard input
output standard output

You are given an array of n integers $a_1$ ... $a_n$. The cost of a subsegment is the number of unordered pairs of distinct indices within the subsegment that contain equal elements. Split the given array into k non-intersecting non-empty subsegments so that the sum of their costs is minimum possible. Each element should be present in exactly one subsegment.

Input

The first line contains two integers n and k ($2$ ≤ n ≤ $10^5$, 2 ≤ k ≤ min (n, 20)) — the length of the array and the number of segments you need to split the array into.

The next line contains n integers $a_1$, $a_2$, …, $a_n$ (1 ≤ $a_i$ ≤ n) — the elements of the array.

Output

Print single integer: the minimum possible total cost of resulting subsegments.

Examples

input

1
2
7 3
1 1 3 3 3 2 1

output

1
1

input

1
2
10 2
1 2 1 2 1 2 1 2 1 2

output

1
8

input

1
2
13 3
1 2 2 2 1 2 1 1 1 2 2 1 1

output

1
9

Note

In the first example it’s optimal to split the sequence into the following three subsegments: [1], [1, 3], [3, 3, 2, 1]. The costs are 0, 0 and 1, thus the answer is 1.

In the second example it’s optimal to split the sequence in two equal halves. The cost for each half is 4.

In the third example it’s optimal to split the sequence in the following way: [1, 2, 2, 2, 1], [2, 1, 1, 1, 2], [2, 1, 1]. The costs are 4, 4, 1.


Solution

我开始还以为我做出来了,然而第6个点就爆了。下面用|表示分割_表示数字

我的最开始的想法是先随意将原数组随意分割为k组______|____|_________|__________

然后对每一个分割进行左右调整,直到不可调为止,______←|→____|_________|__________,但这样局部最优并不能达到全局最优

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
long long costn(long long n){
return (n-1)*n/2;
}
long long cost[100010]={0};
void init(){
int i;
for(i=0;i<=100000;i++)
cost[i]=costn(i);
}

int n,k;
long long ans = 0;

long long v[100010]={0};
long long cnt[100010]={0};
long long cntnow[100010]={0};

int divk[30]={0}; // return index

void clear(){
int i;
for(i=0;i<=n;i++){
cnt[i]=0;
cntnow[i]=0;
}
}

bool doaj(int index){
clear();
int l = index == 0?0:divk[index-1]+1;
int r = index == k-1?n-1:divk[index+1];
// cout<<index<<":"<<l<<","<<r<<endl;
int i;
for(i=l;i<=r;i++){
cnt[v[i]]++;
// cout<<v[i]<<" "<<cnt[v[i]]<<endl;
}

int ansi = l;
long long dermin = 100000000000;
long long dernow = 0;
long long derori = 100000000000;
for(i=l;i<r;i++){
cntnow[v[i]]++;
long long oril = cntnow[v[i]]-1;
long long orir = cnt[v[i]] - oril;
long long oriv = cost[oril]+cost[orir];

long long nowl = oril+1;
long long nowr = orir-1;
long long nowv = cost[nowl]+cost[nowr];

dernow += -oriv+nowv;
if(i == divk[index]){
derori = dernow;
}
if(dernow < dermin || (dernow==dermin && i==divk[index])){
dermin = dernow;
ansi = i;
}
}
if(dermin < derori){
if(ansi != divk[index]){
// cout<<index<<": ["<<l<<","<<r<<"]"<<"der:"<<dermin-derori<<" | "<<divk[index]<<" -> "<<ansi<<endl;
divk[index] = ansi;
ans += dermin-derori;
return true;
}
}
return false;
}

int main(){
init();
cin>>n>>k;
k--;
int i;
for(i=0;i<n;i++)
scanf("%lld",&v[i]);

for(i=0;i<k;i++)
divk[i]=i;

for(i=k;i<n;i++){
cnt[v[i]]++;
}

for(i=1;i<=n;i++){
ans += cost[cnt[i]];
}

bool adjust=true;

while(adjust){
adjust=false;
for(i=0;i<k;i++){
if(doaj(i))
adjust=true;
}
}

cout<<ans<<endl;
return 0;
}

下一个想法是动态规划,dp[p][k]返回把[0 , p-1]分为k组的最小代价,则答案为dp[n][k]

其中再进行划分k组[0 , p-1] -> k-1组[0 , j-1] + 1组[j,p-1]

dp[p][k] = min{dp[j][k-1] + cost[j,p-1]}

有了这个 时间复杂度在接近 $O(n^3)$ , 对一个固定的k遍历p需要n,对一个p遍历j需要n,对一个j计算cost[j,p-1]需要n

分析:

k遍历p 最外层难优化

考虑最里层cost[j,p-1] 因为相邻的j的访问 会使这个函数左右范围变化不大,所以如果能用全局或者静态变量来维护左、右指向以及和,就能对这里的时间复杂度降低到O(1)

1
2
3
4
5
6
7
8
9
10
static long long cost(int l,int r){
static int _l = 0;
static int _r = -1;
static long long _ans = 0;
while(_r < r) _ans += cnt[v[++_r]]++;
while(_l > l) _ans += cnt[v[--_l]]++;
while(_r > r) _ans -= --cnt[v[_r--]];
while(_l < l) _ans -= --cnt[v[_l++]];
return _ans;
}

这样总的时间复杂度还是在O(n^2)

如果可以对一个p 我们的j的范围不再是[1~p-1],而能更小,那么时间复杂度也就还能下降

下面假设对给定p最小的分割为m,也就是dp[p][k]的最小值为dp[m][k-1] + cost[m,p-1]

_____________m-1|m_________p-1|p___________

______n-1|n_____|__________p-1|p_____|_____

______n-1|n_____|_____________|___q-1|q____

对于一个q>p 最小的分割为n,也就是dp[q][k]最小值为dp[n][k-1] + cost[n,q-1]

1
2
3
4
5
dp[n][k-1] + cost[n,q-1]
= dp[n][k-1] + cost[n,p-1] + der[n->q)[p->q)
>= dp[p][k] + der[n->q)[p->q)
= dp[m][k-1]+cost[m,p-1] + der[n->p)[p->q)
>= dp[m][k-1]+cost[m,p-1] + der[m->p)[p->q)

以上,其中der[base][add]表示在base的基础上再增加add部分的 新增加的cost

即是 在m左侧的最终划分都>=m处的划分,因此 如果在p计算出了m,之后再计算q的分割点时遍历只需要从m->q

同理可证当q<p时 更小的值只会在m左侧

因此 有了这个关系 就可以进行二分搜索,时间复杂度也就降到了O(n log n) [好像也不完全是个这个 估计比这个大一点 因为这样二分以后cost函数的均摊就不是O(1)的样子了]

实现如下

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
#include<iostream>
#include<stdio.h>
#include<stdlib.h>
#define MAX 10000000000

using namespace std;
long long minn(long long v1,long long v2){return v1<v2?v1:v2;}
long long maxx(long long v1,long long v2){return v1>v2?v1:v2;}

long long v[100010]={0};
long long cnt[100010]={0};
long long dp[100010][30]={0};

int n,k;

static long long cost(int l,int r){
static int _l = 0;
static int _r = -1;
static long long _ans = 0;
while(_r < r) _ans += cnt[v[++_r]]++;
while(_l > l) _ans += cnt[v[--_l]]++;
while(_r > r) _ans -= --cnt[v[_r--]];
while(_l < l) _ans -= --cnt[v[_l++]];
// cout<<"["<<_l<<","<<_r<<"]"<<_ans<<endl;
return _ans;
}

void solve(const int & kk,int l,int r,int mathl,int mathr){
// cout<<"---------------:"<<l<<" "<<r<<" "<<mathl<<" "<<mathr<<" "<<endl;
if(l > r)
return ;
int j = (l+r)/2;
int min_pos = mathl;
dp[j][kk] = MAX;
int maxi = minn(j,mathr);
int starti = maxx(kk-1,mathl);
int i;
for(i=starti;i<=maxi;i++){
long long newv = dp[i][kk-1] + cost(i,j-1);
// cout<<l<<" "<<r<<" "<<j<<" "<<i<<" "<<newv<<endl;
if(newv < dp[j][kk]){
dp[j][kk] = newv;
min_pos = i;
}
}
solve(kk, l , j-1, mathl , min_pos);
solve(kk, j+1, r , min_pos, mathr);
}

int main(){
cin>>n>>k;
int i,j,o;
for(i=0;i<n;i++)
scanf("%lld",&v[i]);

dp[0][1] = 0;
for(i=1;i<=n;i++)dp[i][1] = cost(0,i-1);

for(i=2;i<=k;i++)
solve(i,1,n,1,n);
cout<<dp[n][k]<<endl;

return 0;
}

AC

DP Optimizations