-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy path1python(hccho).txt
2266 lines (1707 loc) · 78.4 KB
/
1python(hccho).txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
https://repo.anaconda.com/archive/ ---- anaconda archive
python -m pip install --upgrade pip
os.environ['CUDA_VISIBLE_DEVICES'] = '-1' # cpu only
os.environ["CUDA_VISIBLE_DEVICES"]= "2,3"
# -*- coding: utf-8 -*-
# coding: utf-8
pip install scipy
pip install scikit-learn
pip install scikit-image -----> skimage
pip install Pillow ----> PIL
pip install opencv-python --- cv2
pip install matplotlib
> pip cache dir ----> cache dir 확인 ----> pip cache purge
> pip cache purge ---> to clear all files from pip's cache. ----> C:\Users\BRAIN\AppData\Local\pip\cache는 안 지워지네...
> conda clean --all
### conda activate
> source ~/anaconda3/etc/profile.d/conda.sh
conda --version
conda config --describe
conda config --set auto_activate_base true conda 4.6 이상
=======
C:\Users\Administrator\AppData\Local\pip ----> pip cache directory > cd %USERPROFILE%\AppData\Local\pip ---> pip cache purge로 안지워지네~~
========
python 3.6설치
https://repo.continuum.io/archive/index.html
Anaconda3-5.2.0-Windows-x86_64.exe ---> python 3.6
python 버전 upgrade: > conda install python==3.6.8 (<--- 3.6.4에서 upgrade 된다)
=============================
pydev 수동 설치:
- features와 plugins 파일들을 위치에 복사한다.
- java는 설치해도 되지만, eclipse아래에 jre디렉토리 만들고, 복사해도
- eclipse 실행 후, windows-preferences-PyDev-Interpreters-Python Interpreter - QuickAuto-Config
===========================================================================
conda info ---> cache 디렉토리 확인
conda clean --all
===========================================================================
> conda create -n xxx python=3.6 ----> pythn 버전이 반드시 명시되어야 한다. 없으면, 디폴트로 만들지 않는다.
> conda env list --> 설치된 env 확인
> concda remove --name xxx --all ---> 제거하기
===========================================================================
venv로 가상환경 만들기
1. 가상환경을 만들 디렉토리를 만들어 이동한다.
2. python -m venv 가상환경이름 --> 현재 디렉토리 아래에 가상환경 이름을 가진 directory가 만들어진다.
3. 가상환경이 만들어지 directory로 이동한다.
4. xxxx> Scripts\activate
5. 종료하려면, deactivate 또는 conda deactivate
=====================================================================
eclipse: Windows-Preference-PyDev-Interpreters-Python Interpreter ---> 복수로 등록 가능
예:
python C:\Anaconda\python.exe
tfcpu C:\Anaconda\hccho-virtual\tfcpu\Scripts\python.exe
이렇게 등록한 2개는 각각의 project에서 선택가능하다. project의 properties - PyDev-Interpreter/Grammar - Interpreter 에서 선택하면 된다.
=====================================================================
https://github.com/higgsfield/RL-Adventure/blob/master/6.categorical%20dqn.ipynb ---> 이 파일을 colab에서 열기. nbviewer에서 열기.
https://nbviewer.jupyter.org/github//higgsfield/RL-Adventure/blob/master/6.categorical%20dqn.ipynb
https://colab.research.google.com/github//higgsfield/RL-Adventure/blob/master/6.categorical%20dqn.ipynb
=====================================================================
> pip download
=====================================================================
import skimage.io as io --- error --- pip install로는 안되고,
pip install --upgrade scikit-image
=====================================================================
python package 관리자 권한 설치
>pip install box2d-py -----> Consider using the `--user` option or check the permissions.
>python -m pip install --user box2d-py
=====================================================================
Eclipse에 이미 만들어져 있는 project 추가: New -> Project -> General -> Project --> 폴더 선택 & 이름 입력
=============================
import sys
sys.path.append("../") <--- 코드내에 추가
sys.path.append("D:\\hccho\\im2txt\\im2txt") # append는 제일 끝에 추가
sys.path.insert(0, 'py파일이 있는 경로') # 0 --> 0번째 추가.
sys.path.insert(0,'D:\\hccho\\ML\\PythonCode\\StackGAN-master') <--- 코드내에 추가
또는 eclipse project properties에서 Resource-Linked Resources에서 path를 등록하면
=====================================================================
방법은 단순합니다.
- cuda 9.0 + cudnn 7.1 + tensorflow 1.8
- cuda 8.1 + cudnn 6.0 + tensorflow 1.3
(cuda 8.0 다운받으러 가면, 8.0은 없고 8.1만 있음)
일단
- cuda 9.0 + cudnn 7.1 설치 -> C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v9.0
- cuda 8.1 + cudnn 6.0 설치 -> C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v8.0
설치 후, 환경변수 path에 2개 디렉토리 모두 등록
base에 tensorflow-gpu 1.8 설치, 가상환경(저는 venv사용)에 tensorflow-gpu 1.3 설치
이렇게 하면 2가지 모두 잘 작동합니다.
===================================================================
spyder kernel 죽은 문제 해결
os.environ['KMP_DUPLICATE_LIB_OK']='True'
=====================================================================
num_batch = int(max_train/batch_size) # 1epoch에 필요한 반복
for i in range(num_epoch):
for j in range(num_batch):
.
.
.
if j % show_every== 0:
print('(Iteration %d / %d, Epoch %d / %d) loss: %f' % (i*num_batch+j, num_epoch*num_batch, i, num_epoch, loss_xxx)
=====================================================================
np.set_printoptions(threshold=sys.maxsize) <--- 모든 data 다 표시
np.set_printoptions(precision=4) <--- 유효숫자 표시
np.set_printoptions(suppress=True) <----- e없이 표시
np.set_printoptions(suppress=True, formatter={'float_kind':'{:5.3f}'.format}, linewidth=130)
====================================================================
import ctypes # An included library with Python install.
def Mbox(title, text, style):
ctypes.windll.user32.MessageBoxW(0, text, title, style)
====================================================================
np.shape ==> (n1-axis=0,n2-axis=1,n3-axis=2,...)
np.sum(x, axis=n) ==> axis n 이 없으짐
===================================================================
np.squeeze(x) <== Remove single-dimensional entries from the shape of an array
np.expand_dims <== Insert a new axis, corresponding to a given position in the array shape
===================================================================
a=[1,1,2,2]
a= list(map((lambda x: x*x),a))
===================================================================
a=np.array([1,2,3,4,5,4,3,2,10]
b = a[a<5] # numpy array이기 때문에 가능
# list comprehension
a=[1,2,3,4,5,4,3,2,10]
b = [ x*10 for x in a if x< 5]
y = [-1 if a==0 else 1 for a in y]
z = [-1 for a in y if a==1]
# 이중 list comprehesion
matrix = [[1,2,3],[4,5,6]]
a = [x for row in maxtrix for x in row]
================================================
x = np.arange(-2, 3)
x.ravel()[np.flatnonzero(x)] # Use the indices of the non-zero elements as an index array to extract these elements:
===================================================================
# sting ==> float변환
a = [['1.2','2.33'],['1.2','2.33']]
b = np.array(a,dtype=np.float32)
# numpy array ==> list
c = b.tolist()
===================================================================
A[[0,1,2],[0,0,1]] = [1,2,7] #numpy array A[0,0],A[1,0],A[2,1]에 각각 1,2,7
===================================================================
# singular value of a ~ sqrt(eigenvalue) of (a.T)a or a(a.T)
# data matrix a의 평균을 0으로 맞추고 나면, SVD는 PCA의 일반화.
# SVD는 a.T.dot(a)를 계산하는 과정에서의 수치적인 손실을 줄일 수 있어 더 안정적이다.
# np.linalg의 첫번째 return U의 열vector가 eigenvector에 해당함.
import contextlib
@contextlib.contextmanager
def printoptions(*args, **kwargs):
original = np.get_printoptions()
np.set_printoptions(*args, **kwargs)
try:
yield
finally:
np.set_printoptions(**original)
import numpy as np
np.set_printoptions(threshold=sys.maxsize)
#a = np.random.randint(10, size=(9, 6))
a =np.array([[0, 9, 7, 9, 1, 6, 7],
[2, 3, 7, 8, 9, 6, 15],
[2, 3, 4, 5, 9, 4, 13],
[5, 8, 0, 3, 9, 5, 14],
[7, 9, 9, 8, 4, 1, 5],
[2, 7, 8, 2, 0, 0, 0],
[8, 7, 0, 4, 8, 3, 12],
[9, 2, 3, 8, 3, 0, 3],
[6, 6, 2, 0, 9, 0, 9]])
#a =np.array([[5,-7,7],[4,-3,4],[4,-1,2]])
aa = a.T.dot(a) # covariance matrix of a
w,z = np.linalg.eig(aa) # z의 열벡터가 eigenvector
bb = a.dot(a.T)
ww,zz = np.linalg.eig(bb) # zz의 열벡터가 eigenvector
U, s, V = np.linalg.svd(a, full_matrices=True) # V의 행벡터가 aa의 eigenvector. vector들 간의 부호가 뒤죽박죽.
# V의 행벡터가 aa의 eigenvector. vector들 간의 부호가 뒤죽박죽. 즉 z.T ~ V, 행들의 부호 때문에 잘 봐야함. rank 갯수 까지만 유효
# U의 열벡터가 bb의 eigenvector. zz.T ~ U.T. rank 갯수까지 유효
UU, ss, VV = np.linalg.svd(a, full_matrices=False) # "numerical recipes" 는 False에 해당하는 것을 구현
print(U.shape,s.shape,V.shape)
print(UU.shape,ss.shape,VV.shape)
with printoptions(precision=3, suppress=True):
# spectral decomposition은 eigenvalue의 정의에 의해 쉽게 증명가능
print('eigen decomposition = spectral decomposition', z.dot(np.diag(w)).dot(z.T) - aa)
# enginvalue 비교
print('eigenvalue 비교',np.sqrt(w) - s)
print('U.dot(U.T)',U.dot(U.T))
print('UU.dot(UU.T)',UU.dot(UU.T)) # Identity 안됨
print('UU.T.dot(UU)',UU.T.dot(UU))
print('V.dot(V.T)', V.dot(V.T))
print('VV.dot(VV.T)', VV.dot(VV.T))
# full matrix는 padding을 해야 복원
print(U.dot(np.pad(np.diag(s),[(0,2),(0,0)],'constant')).dot(V))
# reduced form에서는 그냥 곱하면 됨
print(UU.dot(np.diag(ss)).dot(VV))
r = np.min(a.shape)
print('thin SVD')
print(U[:,:r].dot(np.diag(s[:r])).dot(V[:r,:]))
print(UU[:,:r].dot(np.diag(ss[:r])).dot(VV[:r,:]))
# rank을 이용하여 ...
r= np.linalg.matrix_rank(a)
print('compact SVD')
print(U[:,:r].dot(np.diag(s[:r])).dot(V[:r,:]))
print(UU[:,:r].dot(np.diag(ss[:r])).dot(VV[:r,:]))
r = 5 # rank 보다 작게 잡아, demension reduction을 할 수 있다.
print('truncated SVD')
print(U[:,:r].dot(np.diag(s[:r])).dot(V[:r,:]))
print(UU[:,:r].dot(np.diag(ss[:r])).dot(VV[:r,:]))
# 행 축소
r=4
print('row reduction')
print(U[:,:r].T.dot(a))
# 열 축소
r=3
print('col reduction')
print(a.dot(V[:r,:].T))
=====================================================================================
def f(x):
return x+10
# make another list from a list
A = [1,2,3,4]
A2=[[1,2],[3,4],[5,6]]
#B = [x*10 for x in A]
#B = [f(x) for x in A]
#B = list(map(lambda x: x*10,A))
B = list(map(f,A))
df = pd.DataFrame(A2, columns=['age','age2'])
df['age']= df['age'].apply(f)
=====================================================================================
# DataFrame은 dict형으로 만들거나, List에서 칼럼명을 지정해서 만들 수 있다.
df = pd.DataFrame({'Name':['C','K','P'],'age':[34,53,25]})
A=[[2,3,4],[3,3,5],[5,63,4],[3,4,5]]
A1=['a1','a2','a3']
A2=['x1','x2','x3','x4']
df2=pd.DataFrame(data=A,index=A2,columns=A1)
df3 = df2[['a3','a2']] # 일부 칼럼만 가져오기
df3.columns = ['XX1','XX2']
======================
A = np.array([[13,24,3],[10,20,10],[100,1,1]])
print(A)
df2 = pd.DataFrame(A[1:,1:],columns=A[0,1:],index =A[1:,0])
print(df2)
=====================
모든 data출력
pandas.set_option('display.max_columns', None,'display.max_rows', None)
#Pivot
mydatapd = pd.DataFrame(mydata,columns=['A','B','C'])
mydatapd = mydatapd.pivot(index ='A',columns='B',values='C')
=====================================================================================
import pickle # from six.moves import cPickle as pickle 도 가능
var1 = xxx
var2 = xxx
var3 = xx
with open('test.pickle','wb') as f:
pickle.dump(var1,f)
pickle.dump(var2,f)
pickle.dump(var3,f)
덤프(dump)한 순서대로 로드(load)된다.
with open('test.pickle','rb') as f:
data1=pickle.load(f)
data2=pickle.load(f)
data3=pickle.load(f)
=====================================================================================
with open('class_info.pickle','rb') as f:
data1=pickle.load(f,encoding='latin1')
=====================================================================================
dict형으로 변수들을 묶어서 한번에 쓰고, 한번에 읽어올수 있다.
파일을 열때 'b'(binary)로 해야함.
with open('data.pickle', 'wb') as f:
pickle.dump(data, f)
with open('data.pickle','rb') as f:
data1=pickle.load(f)
=====================================================================================
# 다음과 같이 열어야 하는 경우도 있음.
from sklearn.externals import joblib
clf = joblib.load('meta.pkl')
=====================================================================================
# save to file. (1d, 2d array only)
np.savetxt("foo.txt", X1, delimiter=",")
====
a = [['a','b'], ['cc','dd']]
np.savetxt(f, a, delimiter="\t", fmt="%s")
=====
# append모드
a = [['a','b'], ['cc','dd']]
with open("aa.txt", "a") as f:
np.savetxt(f, [datetime.datetime.now().strftime("%Y%m%d-%H-%M-%S")], delimiter="\t", fmt="%s")
np.savetxt(f, a, delimiter="\t", fmt="%s")
=====================================================================================
#simple 파일
values = ['1', '2', '3']
with open("file.txt", "w") as output:
for i in values:
output.write(str(i)+'\n')
=====================================================================================
np.random.standard_normal(size=(2,3))
d = np.array(100 * np.random.standard_normal(size=(2,3)),dtype=np.int16)/100
=====================================================================================
a = "123456789"
print("a[:]: ", a[:]) #123456789
print("a[::]: ", a[::]) #123456789
print("a[::3]: ", a[::3]) #147
print("a[::-1]: ", a[::-1]) #987654321
================================================
# 역순으로 for loop
for t in np.arange(5)[::-1]:
print(t)
# reversed
for t in reversed(range(5)):
print(t)
================================================
mat = [[1, 2, 3], [4, 5, 6]]
A = list(zip(*mat)) # [(1, 4), (2, 5), (3, 6)]
B= list(np.array(A))
print(B) # [array([1, 4]), array([2, 5]), array([3, 6])]
C = np.array(A).tolist()
print(C) # [[1, 4], [2, 5], [3, 6]]
=========================================================
a = ["Code", "mentor", "Python", "Developer"]
print (" ".join(a)) # Code mentor Python Developer
=====================================================================================
datetime.date(2017,7,17)+datetime.timedelta(days=4)
# 날짜 시간을 string으로
datetime.datetime.now().strftime("%Y%m%d-%H-%M-%S") # ---> '20191112-10-44-55'
=====================================================================================
from IPython.display import display, Math, Latex
display(Math(r'F(k) = \int_{-\infty}^{\infty} f(x) e^{2\pi i k} dx'))
=====================================================================================
경과시간
import time
s=time.time()
...
e=time.time()
print(e-s,"sec")
=====================================================================================
A = np.array([[1,2,0,0,0],[2,3,5,0,0],[0,1,3,2,0],[0,0,1,2,3],[0,0,0,2,1]])
B = np.array([2,3,1,0,1]).T
X = np.linalg.solve(A,B.T)
print(X, A.dot(X))
=====================================================================================
#이진수 만들기
import numpy as np
A = np.array([range(5)],dtype=np.uint8).T
B = np.unpackbits(A,axis=1)
=====================================================================================
"""
mydata3.txt
3
5
1.2,3,4,5,6
33,1,2,3,4
2,3.4,5.5,6,6
"""
import numpy as np
file = open("mydata3.txt")
line = file.readline()
dimX = int(line)
line = file.readline()
dimY = int(line)
lines = file.readlines()
lines = ','.join(lines)
lines = lines.replace('\n','')
data = np.fromstring(lines,sep=',',dtype=np.float).reshape(dimX,dimY)
print(data)
=====================================================================================
def line_finder():
filename = r"C:\Users\MarketPoint\Downloads\screenlog.ko_en"
with open(filename,'rt') as f:
Lines = f.readlines()
for i, l in enumerate(Lines):
matches = re.findall("val acc epoch:", l)
if matches:
print((Lines[i-6].strip() + l).strip()) #### 6줄 위의 line도 같이 출력
=====================================================================================
#\n 없이 읽기
words = open(filename, encoding='utf-8').read().splitlines() # list ['가게', '가격', '가구', '가구',...]
=====================================================================================
import numpy as np
A ={'a':[1,2,3],'b': [3,4,5,3,6,6,4]} # dict
np.save('test.npy',A)
a = np.load('test.npy') # numpy.ndarray
b = a.item() # 저장했던 dict. 이렇게 해야, b가 a와 같은 dict가 된다.
=====================================================================================
A ={'a':[1,2,3],'b': [3,4,5,3,6,6,4]} # dict
np.savez('test.npz',**A) # 이렇게 저장해야, 다시 load했을 때, dict형으로 복원이 편한다.
data = np.load('test.npz') # dict(data) --> dict형으로 쉽게 변환되고, 변환하지 않아도 dict 처럼 사용가능.
단, dict로 만들어진 A속에 dict형이 있으면 잘 복원되지 않는다. dict를 포함하고 있는 경우는 pickle로 저장하는 것이 좋다.
=====================================================================================
X = {'a': [2,3],'b': np.array([[2,3],[4,5]])}
np.save('xx.npy',X)
Y= np.load('xx.npy',allow_pickle=True)[()]
=====================================================================================
# npy는 numpy array이지만, npz는 dict형에 가깝다.
a = np.load('D:\\hccho\\data\\LJ001-0001-audio.npy') # numpy.ndarray
b = np.load('D:\\hccho\\data\\LJ001-0001-mel.npy') # numpy.ndarray
c = np.load('D:\\hccho\\TACOTRON\\multi-speaker-tacotron-tensorflow-master\\datasets\\son\\data\\NB10584578.0000.npz') #numpy.lib.npyio.NpzFile
print(c.keys()) # --> ['linear', 'mel', 'tokens', 'loss_coeff', 'allow_pickle']
=====================================================================================
# 파일 한줄씩 읽어, list에 저장
with open('wnids.txt', 'r') as f:
A = [x.strip() for x in f] # strip: 양끝의 공백, '\n'을 제거한다.
=====================================================================================
import pandas as pd
filename = 'D:\\test.txt'
data = {}
data["review"] = []
with open(filename, "r", encoding='utf-8') as file:
data["review"].append(file.read())
with open(filename, "r", encoding='utf-8') as file:
A = [x.strip() for x in file]
"""
Bromwell High is a cartoon comedy.
It ran at the same time as some
other programs about school life,
such as "Teachers".
My 35 years in the teaching profession lead me to believe that Bromwell
==>
data
{'review': ['Bromwell High is a cartoon comedy.\nIt ran at the same time as some \nother programs about school life, \nsuch as "Teachers".\n My 35 years in the teaching profession lead me to believe that Bromwell\n\n\n']}
A
['Bromwell High is a cartoon comedy.',
'It ran at the same time as some',
'other programs about school life,',
'such as "Teachers".',
'My 35 years in the teaching profession lead me to believe that Bromwell',
'',
'']
=====================================================================================
# loadtxt, genfromtxt는 같은 기능이지만,
# genfromtxt는 missing data를 다룰 수 있는 다양한 옵션이 있다.
# numpy.loadtxt: equivalent function when no data is missing.
mydata = np.genfromtxt('mydata2.txt',delimiter=',',dtype=np.float32)
mydata = np.loadtxt('mydata2.txt', delimiter=',', dtype=np.float32)
=====================================================================================
import numpy as np
import pandas as pd
import datetime
"""
2005-01-03 452500 453000 446000 451000 0.015 0.000 0.350 0.30 41750 41750 41200 41200 0.015 0.000 0.350
2005-01-04 450000 451000 446000 447000 0.015 0.000 0.350 0.30 41450 41650 41000 41450 0.015 0.000 0.350
2005-01-05 440000 443000 436500 443000 0.015 0.000 0.350 0.30 41200 41500 41150 41450 0.015 0.000 0.350
2005-01-06 439000 445500 435000 435000 0.015 0.000 0.350 0.30 41650 42800 41250 42200 0.015 0.000 0.350
2005-01-07 441000 441500 435500 440500 0.015 0.000 0.350 0.30 42200 42600 41550 41550 0.015 0.000 0.350
"""
data = pd.read_csv("inputdata.dat", sep="\t",header = None ,index_col =None, names=['date','start1','high1','low1','close1','R1','Div1','Vol1','Rho','start2','high2','low2','close2','R2','Div2','Vol2'])
data['date'] = pd.to_datetime(data['date']).apply(pd.Timestamp.date)
data=data.set_index('date')
dd = datetime.date(2005,1,5) + datetime.timedelta(days=6)
b = data.loc[dd]['start1']
a = data.ix[0]
print(b,a)
=====================================================================================
random 선택
N = 100
A = np.arange(N)
choice = np.random.choice(N,10) # 중복있음
choice = np.random.choice(N,10, replace=False)
B = A[choice]
===
a = np.random.rand(3) # 3개 중에 선택
a = a/np.sum(a) # 합이 1이 되로록(확률)
# method 1
i = int(np.searchsorted(np.cumsum(a),np.random.rand(1)) # 1개 선택
# method 2
j = np.random.choice(len(a),1,a.tolist()) # 1개 선택
=============================================
import numpy as np
import random
from collections import deque #collections에는 다양한 자료형이 있다.
A = deque(maxlen=100)
A.append([3.5,1])
A.append([4.5,5])
A.append([5.5,2])
A.append([6.5,1])
A.append([7.5,3]) # deque([[3.5, 1], [4.5, 5], [5.5, 2], [6.5, 1], [7.5, 3]])
B = random.sample(A,2) # [[4.5, 5], [5.5, 2]]
C = np.random.choice(A,2) # ==> error. must be 1-dimensional
=============================================
Shuffle
s = np.arange(A.shape[0])
np.random.shuffle(s)
A1 = A[s]
B1 = A[s]
=====================================================================================
import numpy as np
#처음에 data 없이 빈공간만 shape에 맞게 만들기
a = np.empty(0).reshape(0,3,2)
b= np.array([[[1.2,2],[3,4],[5,6]]])
c = np.concatenate((a,b),0)
=====================================================================================
import argparse
parser = argparse.ArgumentParser(description='Easy Implementation of DCGAN')
#다음과 같이 하면, -h(help)에서 default argument 값도 보여준다.
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
# parameters
parser.add_argument('datasets', metavar='N', type=str, nargs='+', choices=['celebA', 'lsun', 'mnist'],
help='name of dataset to download [celebA, lsun, mnist]') # metavar provides a different name for optional argument in help messages.
# 위의 경우와 같이 -- 없이 'datasets'만 있는 경우, argument값만 넘겨야 한다.
# nargs='+' ==> 여러개의 argument가 list로 들어간다.
parser.add_argument('--filelist', type=str, default='filelist.txt')
parser.add_argument('--out_dir', type=str, default='./output', help="where to put output files")
parser.add_argument('--epochs', type=int, default=10)
parser.add_argument('--batch_size', type=int, default=32)
parser.add_argument('--batch_size2', required=True, choices=["aa","bb"]) <----- required=True 항목이 빠지면, usage를 출력하면서 끝난다.
# 옵션에 따라 하나의 parameter 설정 ====> 'flip'에 True 또는 False를 주고 싶을 때....
parser.add_argument("--flip", dest="flip", action="store_true", help="flip images horizontally")
# ---> action="store_true" ==> default로 False. 반면 action="store_false" ==> True
parser.add_argument("--no_flip", dest="flip", action="store_false", help="don't flip images horizontally")
parser.set_defaults(flip=True)
===
# train_flag에 True 또는 False를 주고 싶을 때....
parser.add_argument('--train', dest='train_flag', action='store_true')
parser.add_argument('--test', dest='train_flag', action='store_false')
parser.set_defaults(train_flag=False)
> python xxx.py --train ====> train_flag에 True값
> python xxx.py --test ====> train_flag에 False값
===
args = parser.parse_args()
filelist_dir = args.filelist
output_dir = args.out_dir
total_epoch = args.epochs
batch_size = args.batch_size
print(filelist_dir, output_dir, total_epoch,batch_size )
> python zzz.py -h ==> help 출력
=====================================================================================
# lbfgs를 default로 true로하고, --adam 을 argument로 넣으면, lbfgs가 false
parser.add_argument("--adam",dest='lbfgs',help="True=lbfgs, False=Adam", action="store_false")
parser.set_defaults(lbfgs=True)
=====================================================================================
import argparse
parser=argparse.ArgumentParser()
parser.add_argument('-auto', action='store_false', )
parser.add_argument("--max_epochs", type=int, default=100, help="number of training epochs")
args=parser.parse_args()
print(args)
print(vars(args)) # 내장함수 vars를 이용하여 dict형으로 변환
==========================================================
아래 2가지 모두 가능.
>python xxx.py --batch_size=32
>python xxx.py --batch_size 32
=====================================================================================
parser = argparse.ArgumentParser()
parser.add_argument('--batch_size', default=100, type=int, help='batch size')
parser.add_argument('--train_steps', default=1000, type=int,help='number of training steps')
#args = parser.parse_args()
args = parser.parse_args(sys.argv[1:]) # 새로운 argument추가는 불가. 이미 정의된 argument update가능
=====================================================================================
parser = argparse.ArgumentParser()
args = parser.parse_args()
args.num_hidden = 10
args.num_epoch = 20
print(args)
=====================================================================================
flags = tf.app.flags
이 함수는 argparse 모듈과 같은 역할을 합니다. argparse보다 나은 점은 저가 볼 때는 굳이 argparser를 import 할 필요 없이 아래와 같이 추가만 하면 됩니다.
flags = tf.app.flags
flags.DEFINE_integer("epoch", 25, "Epoch to train [25]")
flags.DEFINE_string("sample_dir", "samples", "Directory name to save the image samples [samples]")
flags.DEFINE_boolean("is_crop", False, "True for training, False for testing [False]")
FLAGS = flags.FLAGS
if not os.path.exists(FLAGS.checkpoint_dir): # 또는: os.makedirs(FLAGS.checkpoint_dir,exist_ok=True)
os.makedirs(FLAGS.checkpoint_dir)
flags에 tf.app.flags 를 통해 객체를 저장하고 DEFINE_integer나 DEFINE_string 을 통해 아래 argparse와 같은 역할을 할 수 있습니다
=====================================================================================
Examples = collections.namedtuple("Examples", "paths, inputs, targets, count, steps_per_epoch")
=====================================================================================
image 읽기
img = skimage.io.imread(path) ==> 정수 data
resized_img = skimage.transform.resize(crop_img, (img_size, img_size)) #정수 data image가 넘어와도 float로. preserve_range=True(정수형태지만, type은 float), order로 변환방법 설정. 0: Nearest-neighbor 1: Bi-linear (default) 2: Bi-quadratic 3: Bi-cubic 4: Bi-quartic 5: Bi-quintic
skimage.io.imshow(img)
plt.show()
======
im = scipy.misc.imread ==> 정수 data
im = scipy.misc.imresize(im, self.resize) #정수값이 return. ----> deprecated in Scipy 1.14.3 ---> skimage.transform.resize를 사용해야 된다.
scipy.misc.imsave(save_path, im)
=====================================================================================
from scipy.misc import imread, imresize ---> imread, imresize가 deprecated되었다.
scipy.misc.imread ---> 정수 uint8 ---> from imageio import imread 사용하면 된다.
scipy.misc.resize ---> 정수 uint8 ---> from PIL import Image릉 이용해서, array를 Image로 만든 후, resize하고 np.array로 comverting한다. ---> uint8
from PIL import Image
np.array(Image.fromarray(img).resize((10,10))) # 여기서 img: uint8 numpy array
=====================================================================================
import os, glob
a = os.path.join("../tensorflow-style-transfer-master","images", "*.jpg") #string을 \로 이어 path를 만든다.
b = glob.glob(a) # wild card를 풀어 filename list를 만든다.
files = glob.glob(os.path.join(base_path,'*','*.wav')) # 한 단계 아래의 여러 하위디렉토리 모두의 파일.
files = glob.glob(os.path.join(base_path,'**','*.wav')) # 한단계, 두단계,... 상관 없이 모든 하위 디렉토리의 파일
=====================================================================================
input_dir = "D:\\hccho\\CycleGAN-TensorFlow-master\\data\\apple2orange\\testA"
x = os.scandir(input_dir)
for i in x:
print(i.path)
=====================================================================================
import gzip,os
import numpy as np
import skimage.io
import scipy.misc
import matplotlib.pyplot as plt
data_dir = 'D:\hccho\ML\PythonCode\CommonDataset\mnist'
fd = os.path.join(data_dir,'train-images-idx3-ubyte.gz')
with gzip.open(fd, 'rb') as f:
loaded = np.frombuffer(f.read(), np.uint8, offset=16)
trX = loaded.reshape((60000,28,28,1)).astype(np.float)
skimage.io.imshow(np.squeeze(trX[0]/255.0))
plt.show()
skimage.io.imshow(np.squeeze((trX[0]/255.0 +1)/2))
plt.show()
plt.imshow(np.squeeze(trX[0]/255.0))
plt.show()
plt.imshow(np.squeeze((trX[0]/255.0 +1)/2))
#scipy.misc.imsave("a.jpg",np.squeeze(trX[0]/255.0)) # 0~1
#scipy.misc.imsave("b.jpg",np.squeeze((trX[0]/255.0 +1)/2)) 0.5~1
img1 = skimage.io.imread("a.jpg")
img2 = skimage.io.imread("b.jpg")
=====================================================================================
from PIL import Image
import os, glob
#image_files = glob.glob("./*.png")
image_files = glob.glob("*.png")
print(image_files)
images = [Image.open(f) for f in image_files] ----> from natsort import natsorted ---> natsorted 필요.
images = images*10 # 반복
gif = images[0]
gif.save(fp="result.gif", format='gif', save_all=True, append_images=images[1:],duration=800) # duration 지정
print(Image.open("result.gif").n_frames)
#########################################
# imageio로 gif 만들기
import imageio
def make_gif_from_train_plots(fname: str) -> None:
png_dir = "train_plots/"
images = []
sort = sorted(os.listdir(png_dir))
for file_name in sort[1::1]:
if file_name.endswith(".png"):
file_path = os.path.join(png_dir, file_name)
images.append(imageio.imread(file_path))
imageio.mimsave("gifs/" + fname, images, duration=0.05)
#########################################
# jupyter notebook에서 gif 보여주기
from IPython.display import Image as Image2 # PIL Image와 이름이 같아서...
with open('result.gif','rb') as f:
display(Image2(data=f.read(), format='png'))
#######################################
from PIL import Image
import matplotlib.pyplot as plt
import cv2
filename = 'black-white.jpg' # load해 보면, (H,W,C)
img = Image.open(filename).convert('L') # convert('RGB') gray(흑백) 변환
img2 = cv2.applyColorMap(np.array(img),cv2.COLORMAP_JET)
##########################################
### 동영상 gif로 변환
from moviepy.editor import *
from moviepy.video.io.ffmpeg_tools import ffmpeg_extract_subclip
VideoFileClip('D:/hccho/RL/ReinforcementZeroToAll-master/movie/openaigym.video.0.11248.video000000.mp4').speedx(4).write_gif('out.gif')
# VideoFileClip(filename).write_videofile("xxx.mp4") # save as
#ffmpeg_extract_subclip(filename, 0, 60*2+20, targetname="xxx.mp4") # 초 단위 추출-- 부분 추출
=====================================================================================
import skimage.io
from tensorflow.examples.tutorials.mnist import input_data # ---> TF 2.x에서는 없음.
mnist = input_data.read_data_sets('D:\hccho\CommonDataset\mnist', one_hot=True)
inputs = mnist.train.images
labels = mnist.train.labels
n = 14
skimage.io.imshow(inputs[n].reshape(28,28))
print(labels[n])
=====================================================================================
# sklean에서 제공하는 MNIST(자체 포맷) ---> sklearn으로 다운받으면 30초, 이미 다운 받은 cache로 하면, 17~20초
import time
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_openml
s_time = time.time()
mnist = fetch_openml('mnist_784',version=1,data_home=r'D:\hccho\CommonDataset\mnist\scikit_learn_data') # 14M. data_home=None ---> C:\Users\BRAIN\scikit_learn_data 아래에...
print(mnist.keys())
print(mnist['data'].shape, mnist['target'].shape) # 'data', 'target'모두 정수 numpy array
print(mnist['data'][0], mnist['target'][0])
plt.imshow(mnist['data'][0].reshape(28,28),cmap='binary') # cmap = 'binary'(흑백으로 보인다)
plt.show()
print(time.time()-s_time)
=====================================================================================
# image file 읽어와 보여주기
x = ['a.jpg','b.jpg','c.jpg','d.jpg']
img = imread(x[1])
plt.subplot(2,1,1)
plt.imshow(img)
img = imread(x[2])
plt.subplot(2,1,2)
plt.imshow(img)
plt.show()
=====================================================================================
import matplotlib.pyplot as plt
from PIL import Image
img = Image.open('/home/hccho2/datasets/mj_evaluation/000000-완고스레.jpg')
plt.imshow(img)
plt.show()
=====================================================================================
def change_image_format():
import os, glob
a = os.path.join("D:\hccho\StackGAN-hccho\StcakGAN-Result", "*.png") #string을 \로 이어 path를 만든다.
b = glob.glob(a) # wild card를 풀어 filename list를 만든다.
for f in b:
image = Image.open(f)
image.save(f[:f.find('png')] + 'jpg')
=====================================================================================
# numpy image concat
# a: shape =(5, 256, 256, 3)
np.concatenate(a,axis=0) ==> (1280, 256, 3) <---- a[0],a[1],a[2],a[3],a[4] 가 axis = 0으로 concat된다.
np.concatenate(a,axis=1) ==> (256, 1280, 3)
np.concatenate(a,axis=1) ==> (256, 256, 15)
=====================================================================================
img_path = "cat.jpeg"
im = plt.imread(img_path)
h,w,c = im.shape
h1 = w1 = 512
img_raw = tf.io.read_file(img_path)
img_tensor = tf.reshape(tf.image.decode_image(img_raw),im.shape)
img_upsampled = tf.image.resize_images(img_tensor,size=(h1,w1),method=1) #BILINEAR = 0, NEAREST_NEIGHBOR = 1, BICUBIC = 2, AREA = 3
sess = tf.Session()
x = sess.run(img_tensor)
x_upsampled = sess.run(img_upsampled)
fig = plt.figure()
fig.suptitle("Upsampling", fontsize=16)
ax = plt.subplot("121")
ax.set_title("Origin")
ax.imshow(x,extent=[0,w,0,h])
ax.set_aspect('equal',anchor ='SW')
ax = plt.subplot("122")
ax.set_title("Resize")
ax.imshow(x_upsampled,extent=[0,h1,0,w1])
ax.set_aspect('equal',anchor ='SW')
plt.axes
plt.show()
=====================================================================================
print(os.listdir("../")) <---1단계 아래의 모든 디렉토리 + 파일 -----> 하위-더 하위의 모든 파일, 디렉토리를 원한다면, os.walk
[d for o in os.listdir(d) if os.path.isdir(os.path.join(d,o))] # ===> 디렉토리만(파일 제외)
=====================================================================================
#list 원소 모두 곱하기
from functools import reduce
A = [2,3,5]
result = reduce(lambda x, y: x * y, A)
print(result)
#np.prod로 하면 더 간단함.
np.prod(A)
=====================================================================================