goal is to leave lots of room for student improvements in class. also, introduce missingness in the data
47 lines
1.2 KiB
Python
47 lines
1.2 KiB
Python
import random
|
|
import sys
|
|
|
|
import numpy as np
|
|
|
|
|
|
FILE_COLUMN_NAMES = [
|
|
"n", "m", "v1", "v2", "v3", "p1", "p2", "p3",
|
|
]
|
|
|
|
|
|
def write_data_file(filename, n=20, missing_values=False):
|
|
"""Write a csv file with data
|
|
"""
|
|
|
|
with open(filename, "w") as fp:
|
|
fp.write(", ".join(FILE_COLUMN_NAMES) + ",\n")
|
|
for i in range(n):
|
|
vals = []
|
|
vals.append(random.gauss(3., 0.5))
|
|
vals.extend(
|
|
(
|
|
random.gauss(0., 25),
|
|
random.gauss(0., 25),
|
|
random.gauss(0., 25),
|
|
)
|
|
)
|
|
vals.extend(
|
|
(
|
|
random.gauss(0., 1.5),
|
|
random.gauss(0., 1.5),
|
|
random.gauss(0., 1.5),
|
|
)
|
|
)
|
|
if missing_values:
|
|
if random.randint(0,10) < 1:
|
|
idx = random.randint(1, len(vals))
|
|
vals[idx] = np.nan
|
|
line = f"{i}, " + ", ".join(f"{v:7.5f}" for v in vals) + ",\n"
|
|
fp.write(line)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
missing_values = sys.argv[-1] == '-m'
|
|
|
|
write_data_file("sample_data.csv", n=20, missing_values=True)
|