forked from sassoftware/pyviyatools
-
Notifications
You must be signed in to change notification settings - Fork 0
/
listfiles.py
executable file
·158 lines (122 loc) · 6.25 KB
/
listfiles.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# lisfiles.py January 2018
#
# provides an easy interface to query what files are currently stored in the infrastructure data server.
# You can list all files sorted by modified date or size of file, and query based on date modified,
# user who last modified the file, parentUri or filename. The output provides the size of each file,
# so that you can check the space being used to store files.
# Use this tool to view files managed by the files service and stored in the infrastructure data server.
#
# For example, if I want to see all potential log files,
# created by the /jobexecution service that are older than 6 days old.
#
# ./listfiles.py -n log -p /jobExecution -d 6 -o csv
#
# Blog: https://blogs.sas.com/content/sgf/2019/04/04/where-are-my-viya-files/
#
# Change History
#
# 27JAN2019 Comments added
# 12SEP2019 Added the ability to specifiy a folder as an alternative to a URI
# 15SEP2023 BUG need to quote modified by
#
# Copyright © 2018, SAS Institute Inc., Cary, NC, USA. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the License); you may not use this file except in compliance with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
# express or implied. See the License for the specific language governing permissions and limitations under the License.
#
import argparse , datetime, sys
from sharedfunctions import callrestapi,printresult,getfolderid,getidsanduris,createdatefilter
from datetime import datetime as dt, timedelta as td
# setup command-line arguements. In this block which is common to all the tools you setup what parameters
# are passed to the tool
# the --output parameter is a common one which supports the three styles of output json, simple or csv
parser = argparse.ArgumentParser()
parser = argparse.ArgumentParser(description="Query and list files stored in the Viya Infrastructure Data sSrver.")
parser.add_argument("-n","--name", help="Name contains",default=None)
parser.add_argument("-c","--type", help="Content Type in.",default=None)
parser.add_argument("-p","--parent", help="ParentURI starts with.",default=None)
parser.add_argument("-pf","--parentfolder", help="Parent Folder Name.",default=None)
parser.add_argument("-d","--days", help="List files older or younger than this number of days",default='-1')
parser.add_argument("-do","--olderoryounger", help="For the date subsetting specify older or younger",choices=['older','younger'],default='older')
parser.add_argument("-m","--modifiedby", help="Last modified id equals",default=None)
parser.add_argument("-s","--sortby", help="Sort the output by this field",default='modifiedTimeStamp')
parser.add_argument("-so","--sortorder", help="Sort order",choices=['ascending','descending'],default='descending')
parser.add_argument("-v","--verbosecsv", help="Verbose CSV(only used with -o=csv) ", action='store_false' )
parser.add_argument("-o","--output", help="Output Style", choices=['csv','json','simple','simplejson'],default='json')
parser.add_argument("--debug", action='store_true', help="Debug")
args = parser.parse_args()
output_style=args.output
days=args.days
modby=args.modifiedby
sortby=args.sortby
nameval=args.name
puri=args.parent
pfolder=args.parentfolder
debug=args.debug
verbosecsv=args.verbosecsv
sortorder=args.sortorder
olderoryounger=args.olderoryounger
files_result_json=None
# you can subset by parenturi or parentfolder but not both
if puri !=None and pfolder !=None:
print("ERROR: cannot use both -p parent and -pf parentfolder at the same time.")
print("ERROR: Use -pf for folder parents and -p for service parents.")
sys.exit()
# calculate time period for files
datefilter=createdatefilter(olderoryounger=olderoryounger,datevar='creationTimeStamp',days=days)
# create a list for filter conditions
filtercond=[]
# there is always a number of days, the default is zero
filtercond.append(datefilter)
if nameval!=None: filtercond.append('contains($primary,name,"'+nameval+'")')
if modby!=None: filtercond.append("eq(modifiedBy,'"+modby+"')")
# set the request type
reqtype='get'
delimiter = ','
# process items not in folders
if puri!=None:
filtercond.append("contains(parentUri,'"+puri+"')")
completefilter = 'and('+delimiter.join(filtercond)+')'
reqval="/files/files?filter="+completefilter+"&sortBy="+sortby+":"+sortorder+"&limit=10000"
# process items in folders
elif pfolder!=None:
folderid=getfolderid(pfolder)[0]
completefilter = 'and('+delimiter.join(filtercond)+')'
reqval="/folders/folders/"+folderid+"/members?&sortBy="+sortby+":"+sortorder+"&limit=10000"
#if debug: print(reqval)
files_in_folder=callrestapi(reqval,reqtype)
#now get the file objects using the ids returned
iddict=getidsanduris(files_in_folder)
# get the uris of the files
uris=iddict['uris']
#get id, need to do this because only the uri of the folder is returned
idlist=[]
for item in uris:
vallist=item.rsplit('/')
idlist.append(vallist[-1])
#inclause = ','.join(map(str, ids))
inclause=(', '.join("'" + item + "'" for item in idlist))
filtercond.append("in(id,"+inclause+")")
completefilter = 'and('+delimiter.join(filtercond)+')'
reqval="/files/files?filter="+completefilter+"&sortBy="+sortby+":"+sortorder+"&limit=10000"
else:
completefilter = 'and('+delimiter.join(filtercond)+')'
reqval="/files/files?filter="+completefilter+"&sortBy="+sortby+":"+sortorder+"&limit=10000"
if debug: print(reqval)
files_result_json=callrestapi(reqval,reqtype)
if verbosecsv:
cols=['id','name','contentType','documentType','createdBy','modifiedTimeStamp','size','parentUri']
else:
cols=['id','name','contentType','description','typeDefName','documentType','contentDisposition','fileStatus','searchable','size','creationTimeStamp','createdBy','modifiedBy','modifiedTimeStamp','expirationTimeStamp','encoding','parentUri']
# print result
if files_result_json == None:
print("WARNING: No files returned by query.")
else:
printresult(files_result_json,output_style,cols)