-
Notifications
You must be signed in to change notification settings - Fork 13
/
extract.py
51 lines (41 loc) · 1.93 KB
/
extract.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import csv
from faker import Faker
import random
import string
from google.cloud import storage
# Specify number of employees to generate
num_employees = 100
# Create Faker instance
fake = Faker()
# Define the character set for the password
password_characters = string.ascii_letters + string.digits + 'm'
# Generate employee data and save it to a CSV file
with open('employee_data.csv', mode='w', newline='') as file:
fieldnames = ['first_name', 'last_name', 'job_title', 'department', 'email', 'address', 'phone_number', 'salary', 'password']
writer = csv.DictWriter(file, fieldnames=fieldnames)
writer.writeheader()
for _ in range(num_employees):
writer.writerow({
"first_name": fake.first_name(),
"last_name": fake.last_name(),
"job_title": fake.job(),
"department": fake.job(), # Generate department-like data using the job() method
"email": fake.email(),
"address": fake.city(),
"phone_number": fake.phone_number(),
"salary": fake.random_number(digits=5), # Generate a random 5-digit salary
"password": ''.join(random.choice(password_characters) for _ in range(8)) # Generate an 8-character password with 'm'
})
# Upload the CSV file to a GCS bucket
def upload_to_gcs(bucket_name, source_file_name, destination_blob_name):
storage_client = storage.Client()
bucket = storage_client.bucket(bucket_name)
blob = bucket.blob(destination_blob_name)
blob.upload_from_filename(source_file_name)
print(f'File {source_file_name} uploaded to {destination_blob_name} in {bucket_name}.')
# Set your GCS bucket name and destination file name
bucket_name = 'bkt-employee-data'
source_file_name = 'employee_data.csv'
destination_blob_name = 'employee_data.csv'
# Upload the CSV file to GCS
upload_to_gcs(bucket_name, source_file_name, destination_blob_name)