Skip to content

Commit 9710ac4

Browse files
authored
Merge pull request #5 from dotChris90/issue4
#4 Changed Dataframe little bit more Pandas style
2 parents 9a552fd + 523ff1a commit 9710ac4

File tree

6 files changed

+120
-16
lines changed

6 files changed

+120
-16
lines changed

src/PandasNET/DataFrame.cs

+41-5
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,59 @@
11
using NumSharp;
22
using System;
33
using System.Collections.Generic;
4+
using System.Linq;
45
using System.Text;
6+
using System.Dynamic;
57

68
namespace PandasNET
79
{
810
/// <summary>
911
/// Two-dimensional size-mutable, potentially heterogeneous tabular data structure with labeled axes (rows and columns).
1012
/// https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.html
1113
/// </summary>
12-
public class DataFrame<T> : NDArray<T>
14+
public class DataFrame<TIndex,TData>
1315
{
1416
public DataFrame()
1517
{
16-
Columns = new Index();
18+
_ColumnArrayMapping = new Dictionary<string, NDArray<TData>>();
19+
Columns = new Index<string>();
20+
Columns.Values = new NDArray<string>();
21+
Columns.Values.Data = null;
1722
}
23+
public NDArray<TData> this[string column]
24+
{
25+
get
26+
{
27+
return (_ColumnArrayMapping[column]);
28+
}
29+
set
30+
{
31+
if (!_ColumnArrayMapping.ContainsKey(column))
32+
{
33+
if (Columns.Values.Data == null)
34+
{
35+
Columns.Values.Data = new string[]{ column};
36+
Columns.Values.Shape = new Shape(1);
37+
}
38+
else
39+
{
40+
var puffer = Columns.Values.Data.ToList();
41+
puffer.Add(column);
42+
Columns.Values.Data = puffer.ToArray();
43+
Columns.Values.Shape = new Shape( puffer.Count );
44+
}
45+
}
46+
else
47+
{
1848

19-
public Index Columns { get; set; }
20-
21-
public NDArray<T> Values { get; set; }
49+
}
50+
51+
_ColumnArrayMapping[column] = value;
52+
}
53+
}
54+
protected Dictionary<string,NDArray<TData>> _ColumnArrayMapping;
55+
public Index<TIndex> Index {get;set;}
56+
public Index<string> Columns {get;set;}
57+
public NDArray<TData> Values { get; set; }
2258
}
2359
}

src/PandasNET/Extensions/Pandas.DataFrame.cs

+47-5
Original file line numberDiff line numberDiff line change
@@ -2,20 +2,62 @@
22
using System;
33
using System.Collections.Generic;
44
using System.IO;
5+
using System.Linq;
56
using System.Text;
67

78
namespace PandasNET.Extensions
89
{
910
public static partial class PandasExtensions
1011
{
11-
public static DataFrame<T> DataFrame<T>(this Pandas pd, NDArray<T> data, IList<int> index = null, IList<string> columns = null)
12+
public static DataFrame<TInd,TValue> DataFrame<TInd,TValue>(this Pandas pd, NDArray<TValue>[] data, IList<TInd> index = null, IList<string> columns = null)
1213
{
13-
var df = new DataFrame<T>();
14-
df.Columns.Array(columns);
15-
df.Data = data.Data;
16-
df.Shape = data.Shape;
14+
var df = new DataFrame<TInd,TValue>();
15+
16+
if (columns == null)
17+
columns = Enumerable.Range(0,(data.Length-1)).Select(x => x.ToString()).ToArray();
18+
19+
Type indexType = typeof(TInd);
20+
21+
if (index == null)
22+
{
23+
dynamic indexDyn = index;
24+
switch (indexType.Name)
25+
{
26+
case ("Double"): indexDyn = Enumerable.Range(0,data[0].Data.Length).Select(x => (double) x).ToList() ; break;
27+
case ("Int32"): indexDyn = Enumerable.Range(0,data[0].Data.Length).ToList() ; break;
28+
}
29+
index = (List<TInd>) indexDyn;
30+
}
31+
else
32+
{
33+
34+
}
35+
36+
for(int idx = 0; idx < columns.Count;idx++)
37+
df[columns[idx]] = data[idx];
38+
39+
df.Index = new Index<TInd>();
40+
df.Index.Values = new NDArray<TInd>();
41+
df.Index.Values.Data = index.ToArray();
42+
df.Index.Values.Shape = new Shape(index.Count);
1743

1844
return df;
1945
}
46+
public static DataFrame<TInd,TValue> DataFrame<TInd,TValue>(this Pandas pd, NDArray<TValue> data, IList<TInd> index = null, IList<string> columns = null)
47+
{
48+
49+
var vectors = new NDArray<TValue>[data.Shape.Shapes[1]];
50+
51+
for (int idx = 0;idx < data.Shape.Shapes[1];idx++)
52+
{
53+
vectors[idx] = new NDArray<TValue>();
54+
vectors[idx].Data = new TValue[data.Shape.Shapes[0]];
55+
for (int jdx = 0; jdx < data.Shape.Shapes[0];jdx++)
56+
{
57+
vectors[idx].Data[jdx] = data[jdx,idx];
58+
}
59+
}
60+
return pd.DataFrame<TInd,TValue>(vectors,index,columns);
61+
}
2062
}
2163
}

src/PandasNET/Extensions/Pandas.ReadCsv.cs

+4-2
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,8 @@ namespace PandasNET.Extensions
99
{
1010
public static partial class PandasExtensions
1111
{
12-
public static DataFrame<double> read_csv(this Pandas pd, string filepath_or_buffer, string sep = ",")
12+
13+
public static DataFrame<int,double> read_csv(this Pandas pd, string filepath_or_buffer, string sep = ",")
1314
{
1415
var data = new List<double[]>();
1516
var columns = new List<string>();
@@ -32,9 +33,10 @@ public static DataFrame<double> read_csv(this Pandas pd, string filepath_or_buff
3233
}
3334

3435
var nd = new NumPy<double>().array(data.ToArray());
35-
var df = pd.DataFrame(nd, columns: columns);
36+
var df = pd.DataFrame<int,double>(nd, columns: columns);
3637

3738
return df;
3839
}
40+
3941
}
4042
}

src/PandasNET/Index.cs

+7-2
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,16 @@
22
using System;
33
using System.Collections.Generic;
44
using System.Text;
5+
using PandasNET;
56

67
namespace PandasNET
78
{
8-
public class Index : NDArray<string>
9+
public class Index<T>
910
{
10-
public NDArray<string> Values { get; set; }
11+
public Index()
12+
{
13+
14+
}
15+
public NDArray<T> Values { get; set; }
1116
}
1217
}

test/PandasNET.UnitTest/Extensions/Pandas.DataFrame.Test.cs

+11-1
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,17 @@ public void DataFrame()
1717
var np = new NumPy<int>();
1818
var pd = new Pandas();
1919
var array = np.random.randint(low: 0, high: 10, size: new Shape(5, 5));
20-
var df = pd.DataFrame(array, columns: new string[] { "a", "b", "c", "d", "e" });
20+
var df = pd.DataFrame<int,int>(array, columns: new string[] { "a", "b", "c", "d", "e" });
21+
22+
var column1 = df["a"];
23+
24+
for (int idx = 0; idx < 5; idx++)
25+
Assert.IsTrue(column1[idx] == array[idx,0]);
26+
27+
var column2 = df["b"];
28+
29+
for (int idx = 0; idx < 5; idx++)
30+
Assert.IsTrue(column2[idx] == array[idx,1]);
2131
}
2232
}
2333
}

test/PandasNET.UnitTest/Extensions/Pandas.ReadCsv.Test.cs

+10-1
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,16 @@ public class PandasReadCsvTest
1313
public void read_csv()
1414
{
1515
var pd = new Pandas();
16-
var a = pd.read_csv("./data/train.csv");
16+
17+
var trainData = System.IO.Path.GetFullPath("../../../../../data/train.csv");
18+
19+
var a = pd.read_csv(trainData);
20+
21+
var column1 = a["Lag1"];
22+
var column2 = a["Lag2"];
23+
24+
Assert.IsTrue(column1.Size == 998);
25+
Assert.IsTrue(column2.Size == 998);
1726
}
1827
}
1928
}

0 commit comments

Comments
 (0)