-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtranspose.py
164 lines (134 loc) · 5.64 KB
/
transpose.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
from typing import Iterator, List, NamedTuple, Protocol, Set
import numpy as np
import pandas as pd
from cjwmodule import i18n
from cjwmodule.util.colnames import gen_unique_clean_colnames_and_warn
class Settings(Protocol):
MAX_COLUMNS_PER_TABLE: int
MAX_BYTES_PER_COLUMN_NAME: int
class GenColnamesResult(NamedTuple):
names: List[str]
"""All column names for the output table (even the first column)."""
warnings: List[str]
"""All the things we should tell the user about how we tweaked names."""
def _gen_colnames_and_warn(
first_colname: str, first_column: pd.Series, settings: Settings
) -> GenColnamesResult:
"""
Generate transposed-table column names.
If `first_colname` is empty, `column.name` is the first output column. If
both are empty, auto-generate the column name (and warn).
Warn if ASCII-cleaning names, renaming duplicates, truncating names or
auto-generating names.
Assume `first_column` is text without nulls.
"""
input_names = [first_colname or first_column.name]
input_names.extend(list(first_column.values))
names, warnings = gen_unique_clean_colnames_and_warn(input_names, settings=settings)
return GenColnamesResult(names, warnings)
def render(table, params, *, input_columns, settings: Settings):
warnings = []
colnames_auto_converted_to_text = []
if len(table) > settings.MAX_COLUMNS_PER_TABLE:
table = table.truncate(after=settings.MAX_COLUMNS_PER_TABLE - 1)
warnings.append(
i18n.trans(
"warnings.tooManyRows",
"We truncated the input to {max_columns} rows so the "
"transposed table would have a reasonable number of columns.",
{"max_columns": settings.MAX_COLUMNS_PER_TABLE},
)
)
if not len(table.columns):
# happens if we're the first module in the module stack
return pd.DataFrame()
column = table.columns[0]
first_column = table[column]
table.drop(column, axis=1, inplace=True)
if input_columns[column].type != "text":
warnings.append(
{
"message": i18n.trans(
"warnings.headersConvertedToText.message",
'Headers in column "{column_name}" were auto-converted to text.',
{"column_name": column},
),
"quickFixes": [
{
"text": i18n.trans(
"warnings.headersConvertedToText.quickFix.text",
"Convert {column_name} to text",
{"column_name": '"%s"' % column},
),
"action": "prependModule",
"args": [
"converttotext",
{"colnames": [column]},
],
}
],
}
)
# Ensure headers are string. (They will become column names.)
# * categorical => str
# * nan => ""
# * non-text => str
na = first_column.isna()
first_column = first_column.astype(str)
first_column[na] = "" # Empty values are all equivalent
gen_headers_result = _gen_colnames_and_warn(
params["firstcolname"], first_column, settings
)
warnings.extend(gen_headers_result.warnings)
input_types = set(c.type for c in input_columns.values() if c.name != column)
if len(input_types) > 1:
# Convert everything to text before converting. (All values must have
# the same type.)
to_convert = [c for c in table.columns if input_columns[c].type != "text"]
if to_convert:
warnings.append(
{
"message": i18n.trans(
"warnings.differentColumnTypes.message",
'{n_columns, plural, other {# columns (see "{first_colname}") were} one {Column "{first_colname}" was}} '
"auto-converted to Text because all columns must have the same type.",
{"n_columns": len(to_convert), "first_colname": to_convert[0]},
),
"quickFixes": [
{
"text": i18n.trans(
"warnings.differentColumnTypes.quickFix.text",
"Convert {n_columns, plural, other {# columns} one {# column}} to text",
{"n_columns": len(to_convert)},
),
"action": "prependModule",
"args": [
"converttotext",
{"colnames": to_convert},
],
}
],
}
)
for colname in to_convert:
# TODO respect column formats ... and nix the quick-fix?
na = table[colname].isnull()
table[colname] = table[colname].astype(str)
table[colname][na] = np.nan
# The actual transpose
table.index = gen_headers_result.names[1:]
ret = table.T
# Set the name of the index: it will become the name of the first column.
ret.index.name = gen_headers_result.names[0]
# Make the index (former colnames) a column
ret.reset_index(inplace=True)
if warnings:
return (ret, warnings)
else:
return ret
def _migrate_params_v0_to_v1(params):
return {"firstcolname": ""}
def migrate_params(params):
if "firstcolname" not in params:
params = _migrate_params_v0_to_v1(params)
return params