forked from CMSCompOps/WmAgentScripts
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathunifiedConfiguration.json
143 lines (143 loc) · 4.97 KB
/
unifiedConfiguration.json
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
{
"site_for_overflow": {
"value" : ["NONO_T0_CH_CERN","NONO_T2_CH_CERN_HLT"],
"description" : "The sites that we set to overflow and require a specific treatment"
},
"max_cpuh_block" : {
"value" : 4000000,
"description" : "Value of CPUh above which a wf is blocked from assigning"
},
"max_force_complete" : {
"value" : 5,
"description" : "Number of workflow that can be forced complete at a time"
},
"max_per_round" : {
"description" : "limitation on the number of wf to process per module",
"value" : {
"transferor" : 200,
"assignor" : 200,
"closor" : 100,
"checkor" : 200,
"completor" : 200
}
},
"pattern_fraction_pass": {
"value" : {},
"description" : "overide of the completion fraction of dataset with keyword"
},
"tiers_with_no_custodial": {
"value" : ["DQM","DQMIO","RECO","RAWAODSIM"],
"description": "The data tiers that do not go to tape. Can be overidden by custodial overide at campaign level"
},
"use_parent_custodial": {
"value" : false,
"description": "Use the location of the parent dataset for custodial copy"
},
"tiers_with_no_check": {
"value" : ["DQM","DQMIO"],
"description": "The data tiers that do not pass closeout checks. Can be overidden by custodial overide at campaign level"
},
"tiers_no_DDM": {
"value" : ["GEN-SIM","LHE","GEN","DQM","DQMIO","GEN-SIM-DIGI-RAW"],
"description": "The data tiers that do not go to AnaOps"
},
"tiers_to_DDM": {
"value" : ["AODSIM","MINIAODSIM","GEN-SIM-RAW","GEN-SIM-RECO","GEN-SIM-RECODEBUG","AOD","RECO","MINIAOD","ALCARECO","USER","RAW-RECO","RAWAODSIM"],
"description": "The data tiers that go to AnaOps"
},
"tiers_keep_on_disk": {
"value" : ["LHE","RAWAODSIM"],
"description": "the data tier not unlocked until used again"
},
"check_fullcopy_to_announce": {
"value" : false,
"description": "Whether to check for a full copy being present prior to announcing a dataset"
},
"stagor_sends_back": {
"value" : true,
"description": "Whether the stagor module can send workflow back to considered"
},
"max_handled_workflows": {
"value" : 4000,
"description": "The total number of workflows that we allow to handle at a time (transfer, running, assistance)"
},
"max_staging_workflows": {
"value" : 400,
"description": "The total number of workflows that we allow to stage at a time"
},
"max_staging_workflows_per_site": {
"value" : 400,
"description": "The total number of workflows that we allow to stage at a time per site"
},
"max_transfer_in_GB": {
"value" : 800000,
"description": "The total size of the input datasets that can be transfered at a given time"
},
"transfer_timeout": {
"value" : 7,
"description": "Time in days after which to consider a transfer to be stuck"
},
"transfer_lowrate": {
"value" : 0.004,
"description": "Rate in GB/s under which to consider a transfer to be stuck, after transfer_timeout days"
},
"less_copies_than_requested": {
"value" : 1,
"description": "Decrease the number of requested copies by that number, floored to 1"
},
"chopping_threshold_in_GB": {
"value" : 4000,
"description": "The threshold before choping an input dataset in chunk of that size for spreading to sites"
},
"error_codes_to_recover": {
"value" : { "50664" : [ { "legend" : "time-out",
"solution" : "split-2" ,
"details" : null,
"rate" : 20
} ],
"50660" : [ { "legend" : "memory excess",
"solution" : "mem-1000" ,
"details" : null,
"rate" : 20
} ],
"61104" : [ { "legend" : "failed submit",
"solution" : "recover" ,
"details" : null,
"rate" : 20
} ],
"8028" : [ { "legend" : "read error",
"solution" : "recover" ,
"details" : null,
"rate" : 20
} ],
"8021" : [ { "legend" : "cmssw failure",
"solution" : "recover" ,
"details" : "FileReadError",
"rate" : 20
} ],
"8001" : [ { "legend" : "lhe failure",
"solution" : "split-4" ,
"details" : "No lhe event found in ExternalLHEProducer::produce()",
"rate" : 20
} ]
},
"description" : "The error code, threshold and rules for auto-recovery"
},
"error_codes_to_block" : {
"value" :
{
"99109" : [{ "legend" : "stage-out",
"solution" : "recover",
"details" : null,
"rate" : 20
}]
},
"description" : "The error code, threshold and rules to prevent auto-recovery"
},
"error_codes_to_notify" : {
"value" : {
"8021" : { "message" : "Please take a look and come back to Ops." }
},
"description" : "The error code, threshold and rules to notify the user of an error in production"
}
}