-
Notifications
You must be signed in to change notification settings - Fork 1
/
example-rss.xml
601 lines (601 loc) · 293 KB
/
example-rss.xml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet href="http://www.blogger.com/styles/atom.css" type="text/css"?>
<feed xmlns="http://www.w3.org/2005/Atom" xmlns:openSearch="http://a9.com/-/spec/opensearchrss/1.0/" xmlns:blogger="http://schemas.google.com/blogger/2008" xmlns:georss="http://www.georss.org/georss" xmlns:gd="http://schemas.google.com/g/2005" xmlns:thr="http://purl.org/syndication/thread/1.0">
<id>tag:blogger.com,1999:blog-3867310391951630980</id>
<updated>2013-02-19T17:50:39.879-08:00</updated>
<category term="Chart Formatting"/>
<category term="Twitter"/>
<category term="GeoLocation"/>
<category term="World Bank Data"/>
<category term="Learning Statistics"/>
<category term="MySQL"/>
<category term="SQL"/>
<category term="Guardian Data"/>
<category term="RODM"/>
<category term="Statistics"/>
<category term="Date"/>
<category term="Economics"/>
<category term="Data Mining"/>
<category term="Spreadsheet"/>
<category term="fractals"/>
<category term="music"/>
<category term="XML"/>
<category term="ggplot2"/>
<category term="Number Formatting"/>
<category term="Oracle"/>
<category term="Google"/>
<category term="Strings"/>
<category term="RODBC"/>
<category term="BP Oil Spill"/>
<category term="igraph"/>
<category term="Book Reviews"/>
<category term="Color"/>
<category term="Maps"/>
<category term="GitHub"/>
<category term="Conferences"/>
<category term="Financial"/>
<category term="Programming Languages"/>
<category term="quantmod"/>
<category term="iPhone"/>
<category term="R-Chart"/>
<category term="Ruby"/>
<category term="R Environment"/>
<category term="gradesquare"/>
<category term="R Packages"/>
<category term="Time"/>
<category term="Cramer"/>
<category term="JSON"/>
<category term="Developing R Programs"/>
<category term="fitness"/>
<category term="Data Preparation"/>
<title type="text">R-Chart</title>
<subtitle type="html">The R language as experienced by a web application/database developer...</subtitle>
<link rel="http://schemas.google.com/g/2005#feed" type="application/atom+xml" href="http://www.r-chart.com/feeds/posts/default"/>
<link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/3867310391951630980/posts/default"/>
<link rel="alternate" type="text/html" href="http://www.r-chart.com/"/>
<link rel="hub" href="http://pubsubhubbub.appspot.com/"/>
<link rel="next" type="application/atom+xml" href="http://www.blogger.com/feeds/3867310391951630980/posts/default?start-index=26&max-results=25"/>
<author>
<name>C</name>
<uri>http://www.blogger.com/profile/02893688387818336028</uri>
<email>[email protected]</email>
<gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif"/>
</author>
<generator version="7.00" uri="http://www.blogger.com">Blogger</generator>
<openSearch:totalResults>80</openSearch:totalResults>
<openSearch:startIndex>1</openSearch:startIndex>
<openSearch:itemsPerPage>25</openSearch:itemsPerPage>
<entry>
<id>tag:blogger.com,1999:blog-3867310391951630980.post-1451722197159796986</id>
<published>2012-05-17T11:30:00.000-07:00</published>
<updated>2012-05-17T11:30:01.543-07:00</updated>
<title type="text">Github Follower Graph with R</title>
<content type="html">Graph a github user's followers (and follower's followers).<br /><br /><div class="separator" style="clear: both; text-align: center;"><a href="http://3.bp.blogspot.com/-uj_s72aE-1s/T7VC-CxoRyI/AAAAAAAAAqI/qm82nDdMf64/s1600/github_followers.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"><img border="0" height="443" src="http://3.bp.blogspot.com/-uj_s72aE-1s/T7VC-CxoRyI/AAAAAAAAAqI/qm82nDdMf64/s640/github_followers.png" width="640" /></a></div><br /><br />Each programming language tends to develop its own idiomatic set of data structures.&nbsp; In R, data frames are often the structure of choice.&nbsp; JSON (a subset of Javascript) has emerged as an open standard for data interchange that has largely displaced XML for many web based APIs.&nbsp;&nbsp; This post gives an example of how to read JSON from GitHub, manipulate the data within R and produce a graph of the results (like the one above).<br /><br />A few standard R libraries are required:<br /><ul><li><a href="http://cran.r-project.org/web/packages/RCurl/index.html">RCurl</a> to retrieve JSON data (or anything else) from a URL</li><li><a href="http://cran.r-project.org/web/packages/rjson/index.html">rjson</a> to to parse the JSON data</li></ul><br />In this particular <a href="http://developer.github.com/v3/users/followers/">API call</a> (<a href="https://api.github.com/users/yegg/followers">an example of json returned</a> - no API key is required for the Github API), the JSON data will represent a GitHub user's (first thirty) followers.&nbsp; This implies the use of a graph to represent the data so the <a href="http://cran.r-project.org/web/packages/igraph/index.html">igraph</a> library will be used as well.&nbsp;<br /><br /><br /><br /><br />With the RCurl and rjson libraries available, the json results can be retrieved and converted to an R list as follows:<br /><br /><span style="font-size: x-small;"><span style="font-family: &quot;Courier New&quot;,Courier,monospace;">o &lt;- fromJSON(getURL('https://api.github.com/users/EzGraphs/followers'))</span></span><br /><br /><br /><br /><br /><br />You can check the class for yourself using class(o) and view the length of the list using length(o)&nbsp; <br />&nbsp;&nbsp;&nbsp; <br />To convert the results to the data frame where rows represent followers and columns represent attributes, unlist the results, transpose the results and cast as a data frame:<br /><br /><span style="font-family: &quot;Courier New&quot;,Courier,monospace; font-size: x-small;">df &lt;- as.data.frame(t(sapply(o, unlist)))</span><br /><br /><br /><br />That is the basic process - the <a href="https://github.com/ezgraphs/R-Programs/blob/master/github_followers.R">rest of the code</a> is the details related to getting the data into the iGraph object which can then be rendered using plot, tkplot (show above) or rglplot.&nbsp; <br /></content>
<link rel="replies" type="application/atom+xml" href="http://www.r-chart.com/feeds/1451722197159796986/comments/default" title="Post Comments"/>
<link rel="replies" type="text/html" href="http://www.r-chart.com/2012/05/github-follower-graph-with-r.html#comment-form" title="0 Comments"/>
<link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/3867310391951630980/posts/default/1451722197159796986"/>
<link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/3867310391951630980/posts/default/1451722197159796986"/>
<link rel="alternate" type="text/html" href="http://www.r-chart.com/2012/05/github-follower-graph-with-r.html" title="Github Follower Graph with R"/>
<author>
<name>C</name>
<uri>http://www.blogger.com/profile/02893688387818336028</uri>
<email>[email protected]</email>
<gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif"/>
</author>
<media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://3.bp.blogspot.com/-uj_s72aE-1s/T7VC-CxoRyI/AAAAAAAAAqI/qm82nDdMf64/s72-c/github_followers.png" height="72" width="72"/>
<thr:total>0</thr:total>
</entry>
<entry>
<id>tag:blogger.com,1999:blog-3867310391951630980.post-1813598398331387516</id>
<published>2012-05-17T06:45:00.002-07:00</published>
<updated>2012-05-17T06:47:01.656-07:00</updated>
<title type="text">Please Learn to Read</title>
<content type="html">There has been a lot of <a href="http://hn4d.com/fifty_top_posts">chatter during the past week</a> on HN generated by with Jeff Atwood's "Please don't learn to code".&nbsp; Actual posts included:<br /><ul><li><a href="http://www.codinghorror.com/blog/2012/05/please-dont-learn-to-code.html">Please don't learn to code</a> (www.codinghorror.com)&nbsp;</li><li><a href="http://learncodethehardway.org/blog/MAY_15_2012.html">Please Don't Become Anything, Especially Not A Programmer</a> (learncodethehardway.org)</li><li><a href="http://sachagreif.com/please-learn-to-code/">Please Learn to Code</a>(sachagreif.com)&nbsp;</li><li><a href="http://www.randsinrepose.com/archives/2012/05/16/please_learn_to_write.html">Please learn to write</a> (www.randsinrepose.com)&nbsp;</li></ul>Each post had a relatively high number of comments associated with them as well. <br /><br />Granted, Jeff selected a provocative headline that he knew would generate a response. Call it link-bait if you will, but his actual article was pretty well reasoned. A number of the responses and comments suggested that Jeff was promoting some sort of elitist snobbery where the programmer guild was restricting entrance to the uninitiated. The entire reaction by some was predicated by the title of the article alone. Which is why I am going to ask:<br /><br /><h2> Please Learn to Read&nbsp;</h2>If you are going to react - and react strongly - to an article, be responsible and actually read what the author is saying. Creating a strawman to kick around is great entertainment. If your goal is to entertain, have at it I suppose. If your goal is to inform, as most of the people posting and commenting were, why not respond to the actual content of the author's argument?<br /><br />One of the tremendous values that the HN community has demonstrated over time is a certain objectivity and critical analysis that is not widely available elsewhere. Granted, we all have our biases, but ad-hominem assaults and polarizing characterizations don't promote better understanding of a subject. They tend to shut down substantial conversation and the discourse devolves into a virtual shouting match.<br /><br />Active engagement in a debate is a good thing. Again, the HN community allows for interactive participation in a way that is somewhat unique. It is something worth preserving. It is worth a few minutes of time before firing of a response that indicates the author you are opposing is mindless and malicious. In most cases, he is not - if the post made to the front page of HN chances are that it has been vetted by a few folks and introduces some ideas worth considering.<br /><br />It seems to have become a requirement to title articles in a way that will get them initial attention.&nbsp; This sound-byte mentality is perhaps unfortunate, but to be expected in our day where we filter relevant and interesting information so quickly.&nbsp; It is often worth critiquing the selected title on its own merits or for not associated being with the actual content. &nbsp; It is a different thing to read the title of an article, infer the content of the article based upon this alone (or a cursory reading) and responding on this basis alone.<br /><br />Of course there are qualifications qualifications to what has been stated above... much of the conversation was civil and intelligent, some posts might promote ideas that demand a stronger response, there are links to materials immature, misinformed or downright evil.&nbsp; But there has been a trend recently that became evident enough in this last set of posts to motivate me to say something on the subject.<br /><br />I expect that there will be responses to this post indicating that I question the intelligence of the folks who responded and that I am inferring that they are illiterate (<i><u><b>I am not</b></u></i> - many of these folks are downright brilliant in terms of brute intelligence).&nbsp; It is not a question of intelligence.&nbsp; It is a matter of discipline and character.&nbsp; It is a challenge use restraint when responding to another. &nbsp; It is easier to simply allow one's initial reaction to a headline to result in a response that is at best confusing and counterproductive and at worst hurtful.<br /><br />What is easier is not always what is better.<br /><br /></content>
<link rel="replies" type="application/atom+xml" href="http://www.r-chart.com/feeds/1813598398331387516/comments/default" title="Post Comments"/>
<link rel="replies" type="text/html" href="http://www.r-chart.com/2012/05/please-learn-to-read.html#comment-form" title="0 Comments"/>
<link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/3867310391951630980/posts/default/1813598398331387516"/>
<link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/3867310391951630980/posts/default/1813598398331387516"/>
<link rel="alternate" type="text/html" href="http://www.r-chart.com/2012/05/please-learn-to-read.html" title="Please Learn to Read"/>
<author>
<name>C</name>
<uri>http://www.blogger.com/profile/02893688387818336028</uri>
<email>[email protected]</email>
<gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif"/>
</author>
<thr:total>0</thr:total>
</entry>
<entry>
<id>tag:blogger.com,1999:blog-3867310391951630980.post-3427234779316482700</id>
<published>2012-02-21T17:56:00.000-08:00</published>
<updated>2012-02-23T03:09:14.757-08:00</updated>
<category scheme="http://www.blogger.com/atom/ns#" term="ggplot2"/>
<category scheme="http://www.blogger.com/atom/ns#" term="gradesquare"/>
<title type="text">Log File Analysis with R</title>
<content type="html"><br /><div style="text-align: center;"><a href="http://www.gradesquare.com/"><img height="60" src="http://gradesquare.com/css/images/logo.png" width="400" /></a>&nbsp;</div><br /><span style="font-family: inherit;"><span style="background-color: white;">R often comes up in discussions of heavy duty scientific and statistical analysis (and so it should).&nbsp; However, it is also incredibly handy for a variety of more routine developer activities.&nbsp;&nbsp; And so I give you… log file analysis with R! &nbsp;</span><br style="background-color: white;" /><br style="background-color: white;" /><span style="background-color: white;">I was just involved in the launch of&nbsp;</span><a href="http://gradesquare.com/" rel="nofollow" style="background-color: white; color: #234786; outline-color: initial; outline-style: initial; outline-width: 0px;" target="_blank"><span class="yshortcuts" id="lw_1329867226_2" style="color: #366388; cursor: pointer;">gradesquare.com</span></a><span style="background-color: white;">&nbsp;(go ahead – click on the link and check it out.&nbsp; We will still be here later!).&nbsp; With the flurry of recent activity, I needed a way to visualize and communicate site activity to the rest of the team.&nbsp; It only takes a few lines of R to read in a log file (of a reasonable size), format the data, and generate some usable charts. &nbsp;Like most good ideas - it is <a href="http://stackoverflow.com/questions/5664997/logfile-analysis-in-r">not new</a>. &nbsp;Most log files follow a similar format (such as <a href="https://en.wikipedia.org/wiki/Common_Log_Format">common log format</a>)&nbsp;</span><span style="background-color: white;">so there may be some minor variations to the following exercise.</span></span><br /><span style="font-family: inherit;">The only library that I used for this example was ggplot2 for charts. &nbsp;</span><br /><span style="background-color: white; font-family: 'Courier New', courier, monaco, monospace, sans-serif;"><b>library(ggplot2)</b></span><br /><br /><span style="font-size: large;"><b>Read the Log File</b></span><br /><span style="font-family: inherit;">A sample of the log file (miserably wrapped - my apologies):</span><br /><span style="font-family: inherit;"><br /></span><br /><span style="font-size: xx-small;"><span style="background-color: white; font-family: 'Courier New', courier, monaco, monospace, sans-serif;">66.12.71.25 - - [21/Feb/2012 23:44:11] "GET /course/1894/detail HTTP/1.1" 200 7017 5.0829</span><br style="background-color: white; font-family: 'Courier New', courier, monaco, monospace, sans-serif;" /><span style="background-color: white; font-family: 'Courier New', courier, monaco, monospace, sans-serif;">66.12.71.21 - - [21/Feb/2012 23:44:39] "GET /search_by_author?search_learn_exp=Khan+Academy&amp;page=193 HTTP/1.1" 200 8019 0.3288</span><br style="background-color: white; font-family: 'Courier New', courier, monaco, monospace, sans-serif;" /><span style="background-color: white; font-family: 'Courier New', courier, monaco, monospace, sans-serif;">66.12.71.25 - - [21/Feb/2012 23:45:21] "GET /course/19/detail HTTP/1.1" 200 6851 0.1213</span><br style="background-color: white; font-family: 'Courier New', courier, monaco, monospace, sans-serif;" /><span style="background-color: white; font-family: 'Courier New', courier, monaco, monospace, sans-serif;">18.4.5.14 - - [21/Feb/2012 23:45:59] "GET /search_by_subject?search_learn_exp=algebra-i-worked-examples HTTP/1.1" 200 7939 0.0370</span></span><br /><span style="font-family: inherit;"><br /></span><br />If you can't make that out - just know that it is a relatively typical log file that includes the IP address of the client request, the date and time, the HTTP method and URL path, the HTTP request status code, a count of bytes returned and the time required for the request to process.<br /><br /><span style="font-family: inherit;"><br /></span><br /><span style="font-family: inherit;">The log file can be read into a data frame as follows</span><span style="font-family: inherit;">.</span><br /><br /><span style="background-color: white; font-family: 'Courier New', courier, monaco, monospace, sans-serif;"><b>df = read.table('webapp.log')</b></span><br /><br />There are a lot of&nbsp;different options available - and you might want to take advantage of these to minimize the amount of additional cleanup required after loading the file. &nbsp;For details:<br /><br /><span style="background-color: white; font-family: 'Courier New', courier, monaco, monospace, sans-serif;"><b>help(read.table)</b></span><br /><span style="background-color: white; font-family: 'Courier New', courier, monaco, monospace, sans-serif; font-size: 16px;"><br /></span><br /><span style="font-family: 'Courier New', courier, monaco, monospace, sans-serif;"><br /></span><br /><span style="font-family: 'Courier New', courier, monaco, monospace, sans-serif;"><br /></span><br /><span style="font-family: inherit;"><span style="font-family: inherit; font-size: large;"><b>Clean Up and Format</b></span>&nbsp;</span><br /><span style="font-family: inherit;">I chose to clean up manually after the fact. &nbsp;To start, we name the columns in the data frame.</span><br /><span style="font-family: 'Courier New', courier, monaco, monospace, sans-serif;"><br /></span><br /><span style="background-color: white; font-family: 'Courier New', courier, monaco, monospace, sans-serif;"><b>colnames(df)=c('host','ident','authuser','date','time','request','status','bytes','duration')</b></span><br /><br /><br />The date and time were split up when read in above. &nbsp;I am not concerned with the time at this point but do want the date to be cast to a date type.<br /><br /><span style="background-color: white; font-family: 'Courier New', courier, monaco, monospace, sans-serif;"><b>df$date=as.Date(df$date,"[%d/%b/%Y")</b></span><br /><br /><br /><span style="font-family: inherit;">To see the column names and first few rows of our data frame...</span><br /><span style="background-color: white;"><span style="font-family: 'Courier New', Courier, monospace;"><b>head(df)</b></span></span><br /><br />There are a number of different ways of getting a quick handle on the data - you could do a summary for instance. &nbsp;One item that you might want to have is a the number of requests for HTTP status.<br /><br /><span style="background-color: white; font-family: 'Courier New', courier, monaco, monospace, sans-serif;"><b>table(df$status)</b></span><br /><span style="background-color: white; font-family: 'Courier New', courier, monaco, monospace, sans-serif; font-size: 16px;">&nbsp;</span><br /><br />But the item of immediate interest is simply the number of requests. &nbsp;The following will provide the number of requests by date.<br /><span style="background-color: white; font-family: 'Courier New', courier, monaco, monospace, sans-serif;"><b>reqs=as.data.frame(table(df$date))</b></span><br /><br />R is really great for these quick summarizations, and if you memorize a few functions you will be able to address most needs easily. &nbsp;At a certain point, I can better visualize data problems using SQL, and so use the sqldf library. &nbsp;For now - on to some charts using ggplot2.<br /><br /><span style="font-size: large;"><b>Make Some Charts</b></span><br /><br /><div class="separator" style="clear: both; text-align: center;"><br /></div><div class="separator" style="clear: both; text-align: center;"><a href="http://1.bp.blogspot.com/-bJsu_YOIHcQ/T0RGVGyFcXI/AAAAAAAAApg/iacH2g9ewWQ/s1600/TrafficToSite.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"><img border="0" height="400" src="http://1.bp.blogspot.com/-bJsu_YOIHcQ/T0RGVGyFcXI/AAAAAAAAApg/iacH2g9ewWQ/s400/TrafficToSite.png" width="400" /></a></div>One "gotcha" that I hit fairly often with R and ggplot2 is the need to cast variables in a way that allows them to be treated as either continuous or discrete. &nbsp;In the following casting the Var1 field as a Date allows it to be treated as continuous and geom_line() renders a line as intended.<br /><br /><span style="background-color: white; font-family: 'Courier New', courier, monaco, monospace, sans-serif;"><b>ggplot(data=reqs, aes(x=as.Date(Var1), y=Freq)) + geom_line() + xlab('Date') + ylab('Requests') + opts(title='Traffic to Site')</b></span><br /><br /><br /><br /><br />On the other hand, the format function is used in this example to cause the (http) status value to be treated as discrete.<br /><br /><span style="background-color: white; font-family: 'Courier New', courier, monaco, monospace, sans-serif;"><b>ggplot(data=df, aes(x=format(status))) + geom_bar() + xlab('Status') + ylab('Count') + opts(title='Status')</b></span><br /><span style="background-color: white; font-family: 'Courier New', courier, monaco, monospace, sans-serif; font-size: 16px;"><br /></span><br /><div class="separator" style="clear: both; text-align: center;"><a href="http://1.bp.blogspot.com/-hqT0BxkjTrI/T0RGII2ApVI/AAAAAAAAApY/U5kPIVnyazY/s1600/HTTP_Status.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"><img border="0" height="400" src="http://1.bp.blogspot.com/-hqT0BxkjTrI/T0RGII2ApVI/AAAAAAAAApY/U5kPIVnyazY/s400/HTTP_Status.png" width="400" /></a></div><span style="font-family: inherit;">By the way, the images were exported as pngs for the blog by assigning the chart to a variable p and printing like so:</span><br /><span style="font-family: inherit;"><br /></span><br /><br /><span style="font-family: 'Courier New', Courier, monospace;"><b>png("imagename.png")</b></span><br /><span style="font-family: 'Courier New', Courier, monospace;"><b>print(p)</b></span><br /><span style="font-family: 'Courier New', Courier, monospace;"><b>dev.off()</b></span><br /><div style="font-family: inherit;"><br /></div><div style="font-family: inherit;">So there you have it - functional, useful R that addresses a practical every day need of web developers. &nbsp;It is also a great, practical task that can introduce you to R with a simple relevant exercise that provides immediate value.</div><div style="font-family: inherit;"><br /></div><div style="font-family: inherit;">The next time Google Analytics falls short, pull out R and give it a try!</div><br /><div style="text-align: justify;"><br /></div><span style="background-color: white; font-family: 'Courier New', courier, monaco, monospace, sans-serif; font-size: 16px;"><br /></span></content>
<link rel="replies" type="application/atom+xml" href="http://www.r-chart.com/feeds/3427234779316482700/comments/default" title="Post Comments"/>
<link rel="replies" type="text/html" href="http://www.r-chart.com/2012/02/log-file-analysis-with-r.html#comment-form" title="1 Comments"/>
<link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/3867310391951630980/posts/default/3427234779316482700"/>
<link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/3867310391951630980/posts/default/3427234779316482700"/>
<link rel="alternate" type="text/html" href="http://www.r-chart.com/2012/02/log-file-analysis-with-r.html" title="Log File Analysis with R"/>
<author>
<name>C</name>
<uri>http://www.blogger.com/profile/02893688387818336028</uri>
<email>[email protected]</email>
<gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif"/>
</author>
<media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://1.bp.blogspot.com/-bJsu_YOIHcQ/T0RGVGyFcXI/AAAAAAAAApg/iacH2g9ewWQ/s72-c/TrafficToSite.png" height="72" width="72"/>
<thr:total>1</thr:total>
</entry>
<entry>
<id>tag:blogger.com,1999:blog-3867310391951630980.post-6837203126169082016</id>
<published>2011-09-02T03:37:00.000-07:00</published>
<updated>2011-09-02T03:37:27.646-07:00</updated>
<title type="text">Word Cloud from Blog RSS</title>
<content type="html"><a href="http://1.bp.blogspot.com/-iTSzxNlw4V0/TmCwKljIh0I/AAAAAAAAAoc/gXMjeFtBsLs/s1600/r-chart-cloud.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"><img border="0" height="271" src="http://1.bp.blogspot.com/-iTSzxNlw4V0/TmCwKljIh0I/AAAAAAAAAoc/gXMjeFtBsLs/s400/r-chart-cloud.png" width="400" /></a><br /><br />Crazy busy &nbsp;- no time to blog recently. Time enough for pretty pictures based upon previous words though...(thanks&nbsp;<a href="http://www.wordle.net/">http://www.wordle.net</a>). <br /><br /><br /><div class="separator" style="clear: both; text-align: center;"><a href="http://2.bp.blogspot.com/-scOSv1sYTNk/TmCxdTZQcRI/AAAAAAAAAok/XfQS64uUVgo/s1600/r-chart-cloud_tree.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"><img border="0" height="212" src="http://2.bp.blogspot.com/-scOSv1sYTNk/TmCxdTZQcRI/AAAAAAAAAok/XfQS64uUVgo/s320/r-chart-cloud_tree.png" width="320" /></a></div><span class="Apple-style-span" style="font-size: large;"><br /></span><br /><br /><div class="separator" style="clear: both; text-align: center;"></div></content>
<link rel="replies" type="application/atom+xml" href="http://www.r-chart.com/feeds/6837203126169082016/comments/default" title="Post Comments"/>
<link rel="replies" type="text/html" href="http://www.r-chart.com/2011/09/word-cloud-from-blog-rss.html#comment-form" title="0 Comments"/>
<link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/3867310391951630980/posts/default/6837203126169082016"/>
<link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/3867310391951630980/posts/default/6837203126169082016"/>
<link rel="alternate" type="text/html" href="http://www.r-chart.com/2011/09/word-cloud-from-blog-rss.html" title="Word Cloud from Blog RSS"/>
<author>
<name>C</name>
<uri>http://www.blogger.com/profile/02893688387818336028</uri>
<email>[email protected]</email>
<gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif"/>
</author>
<media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://1.bp.blogspot.com/-iTSzxNlw4V0/TmCwKljIh0I/AAAAAAAAAoc/gXMjeFtBsLs/s72-c/r-chart-cloud.png" height="72" width="72"/>
<thr:total>0</thr:total>
</entry>
<entry>
<id>tag:blogger.com,1999:blog-3867310391951630980.post-1594521702624711592</id>
<published>2011-01-01T12:43:00.000-08:00</published>
<updated>2011-01-01T12:43:17.232-08:00</updated>
<category scheme="http://www.blogger.com/atom/ns#" term="Statistics"/>
<category scheme="http://www.blogger.com/atom/ns#" term="fractals"/>
<category scheme="http://www.blogger.com/atom/ns#" term="World Bank Data"/>
<title type="text">Ten News Stories of 2010 - and the Statistics that Made Them</title>
<content type="html"><div class="separator" style="clear: both; text-align: center;"><a href="http://www.statsoft.com/Portals/0/blog/World%20Statistics%20Day.jpg" imageanchor="1" style="clear: left; float: left; margin-bottom: 1em; margin-right: 1em;"><img border="0" src="http://www.statsoft.com/Portals/0/blog/World%20Statistics%20Day.jpg" /></a></div><span class="Apple-style-span" style="font-size: large;">Significance Magazine</span><br />According to&nbsp;<a href="http://www.significancemagazine.org/view/index.html">Significance&nbsp;Magazine</a>&nbsp;(jointly published by&nbsp;<a href="http://www.rss.org.uk/main.asp?page=0">Royal Statistical Society</a>&nbsp;and&nbsp;the&nbsp;<a href="http://www.amstat.org/">American Statistical Association</a>) the following are the top ten stories of 2010.<br /><br /><b>1. Progress in the prevention of HIV: &nbsp; &nbsp;<span class="Apple-style-span" style="font-weight: normal;">Public health studies result in HIV treatment advancements.</span></b><br /><b>2. Drug regulation: restrictions and retractions: &nbsp;&nbsp;<span class="Apple-style-span" style="font-weight: normal;">Related to&nbsp;breast cancer and type 2 diabetes.</span></b><br /><b>3. Measuring a teacher's value: &nbsp;<span class="Apple-style-span" style="font-weight: normal;"><a href="http://projects.latimes.com/value-added/">LA Times</a> graded teachers based on standards tests results.&nbsp;</span></b><br /><b>4. Political rhetoric finds a helpmeet in statistics:</b>&nbsp;&nbsp;&nbsp;"a statistical recovery and a human recession."<br /><b>5. Census of Marine Life:&nbsp;<span class="Apple-style-span" style="font-weight: normal;">The <a href="http://www.plosone.org/article/info:doi/10.1371/journal.pone.0012110">first census of the world's seas</a> completed in 2010.</span></b><br /><b>6. <a href="http://www.nytimes.com/2010/09/24/business/24jelinek.html?_r=1&amp;scp=2&amp;sq=Jelinek&amp;st=cse">Death</a> of <a href="http://www.clsp.jhu.edu/~jelinek/">Frederick Jelinek</a>: &nbsp;<span class="Apple-style-span" style="font-weight: normal;">a pioneer in speech recognition and statistical methods of NLP.</span></b><br /><b><span class="Apple-style-span" style="font-weight: normal;"><b>7. The genetic key to Shangri-La: &nbsp;<span class="Apple-style-span" style="font-weight: normal;">Dr. Paola Sebastiani genetics advancements related to longevity.</span></b></span></b><br /><b>8. Screening saves:&nbsp;<span class="Apple-style-span" style="font-weight: normal;">CT Scanning&nbsp;definitively&nbsp;associated with a reduced risk of lung cancer mortality.</span></b><br /><b>9. Fat kills:&nbsp;<span class="Apple-style-span" style="font-weight: normal;"><span class="apple-style-span"><span style="color: black;">Quantitative reviews in various areas of health and nutrition.</span></span></span></b><br /><b><span class="Apple-style-span" style="font-weight: normal;"><span class="apple-style-span"><span style="color: black;"><b>10. Words, words words:&nbsp;<span class="Apple-style-span" style="font-weight: normal;"><a href="http://www.culturomics.org/">Culturomics</a>&nbsp;project produces the <a href="http://ngrams.googlelabs.com/">Google Ngram Viewer</a>.</span></b></span></span></span></b><br /><br /><br />The details are parceled out in 5 articles:&nbsp;<a href="http://www.significancemagazine.org/details/webexclusive/971759/Ten-News-Stories-of-2010---and-the-Statistics-that-Made-Them_--Part-1.html">Part 1</a>&nbsp;|&nbsp;<a href="http://www.significancemagazine.org/details/webexclusive/971905/Ten-News-Stories-of-2010---and-the-Statistics-that-Made-Them_--Part-2.html">Part 2</a>&nbsp;&nbsp;|&nbsp;<a href="http://www.significancemagazine.org/details/webexclusive/972091/Ten-News-Stories-of-2010---and-the-Statistics-that-Made-Them_--Part-3.html">Part 3</a>&nbsp;|&nbsp;<a href="http://ten%20news%20stories%20of%202010%20-%20and%20the%20statistics%20that%20made%20them.%20part%204/">Part 4</a>&nbsp;|&nbsp;<a href="http://www.significancemagazine.org/details/webexclusive/972741/Ten-News-Stories-of-2010---and-the-Statistics-that-Made-Them_--Part-5.html">Part 5</a><br /><br /><br /><iframe align="left" frameborder="0" marginheight="0" marginwidth="0" scrolling="no" src="http://rcm.amazon.com/e/cm?t=rcha-20&amp;o=1&amp;p=8&amp;l=bpl&amp;asins=1148902732&amp;fc1=000000&amp;IS2=1&amp;lt1=_blank&amp;m=amazon&amp;lc1=0000FF&amp;bc1=000000&amp;bg1=FFFFFF&amp;f=ifr" style="align: left; height: 245px; padding-right: 10px; padding-top: 5px; width: 131px;"></iframe><br /><b><span class="Apple-style-span" style="font-size: large;">Other Stories - or my $0.02.</span></b><br />The following are not exactly in the same category as the listings in significance magazine - but they involve personalities and events that affect many members of the R community and have some sort of analytical/statistical&nbsp;significance.<br /><br /><b><a href="http://unstats.un.org/unsd/wsd/Default.aspx">World Statistics Day</a></b><br />I mean, I missed picking up a greeting card - but the objective of the celebration is pretty worthwhile:<br /><br /><span class="Apple-style-span" style="font-family: Calibri, Helvetica, Arial; font-size: 14px; font-style: italic; line-height: 16px;">building support and better understanding for official statistics among the general public and the policy-makers worldwide.</span><br /><br /><b><a href="http://www.r-bloggers.com/">R-Bloggers</a></b><br /><b><span class="Apple-style-span" style="font-weight: normal;">For the R community, R-Bloggers has had a <a href="http://www.r-statistics.com/2011/01/r-bloggers-in-2010-top-14-r-posts-site-statistics-and-invitation-for-sponsors/">banner year</a> and provided a great deal of visibility for the R community. &nbsp;They are looking for <a href="http://www.r-statistics.com/2011/01/r-bloggers-in-2010-top-14-r-posts-site-statistics-and-invitation-for-sponsors/">sponsorship</a> - so please consider supporting them.</span></b><br /><b><span class="Apple-style-span" style="font-weight: normal;"><br /></span></b><br /><b>U.S. Economic News</b><br />News involved the use of additional zeros tacked on to end of numbers. &nbsp;<b><span class="Apple-style-span" style="font-weight: normal;">The <a href="http://www.recovery.gov/Pages/default.aspx">recovery.org</a> web site has been somewhat underwhelming. &nbsp;<a href="http://www.edwardtufte.com/bboard/q-and-a-fetch-msg?msg_id=0003e0&amp;topic_id=1&amp;topic=">Edward Tufte's nomination</a> to&nbsp;<span class="Apple-style-span" style="font-family: 'times new roman', times, serif;">serve on the Recovery Independent Advisory Panel was a fascinating development. &nbsp;His emphasis on clear and truthful presentation of information could be a Good Thing.</span></span></b><br /><b><span class="Apple-style-span" style="font-weight: normal;"><br /></span></b><br /><b>New Era of Data Journalism</b><br /><b><span class="Apple-style-span" style="font-weight: normal;">The World Bank has continued to provide <a href="http://data.worldbank.org/">more data</a> on economic and social topics. &nbsp;</span></b><br /><b><span class="Apple-style-span" style="font-weight: normal;">A <a href="http://www.r-chart.com/2010/06/plotting-world-bank-data-with-r.html">couple</a> of <a href="http://www.r-chart.com/2010/09/new-world-bank-data-available.html">blog posts</a> covered this, and&nbsp;<b><span class="Apple-style-span" style="font-weight: normal;">an <a href="http://cran.r-project.org/web/packages/WDI/index.html">R package</a> is also available to access the World Bank Data API. &nbsp;There has been an increased refinement in <a href="http://www.guardian.co.uk/data">data journalism</a>&nbsp;as well as controversy surrounding WikiLeaks during 2010. &nbsp;</span></b></span></b><br /><b><span class="Apple-style-span" style="font-weight: normal;"><b><span class="Apple-style-span" style="font-weight: normal;"><br /></span></b></span></b><br /><b><b>Data Marketplace</b></b><br /><b><span class="Apple-style-span" style="font-weight: normal;"><a href="http://infochimps.com/">InfoChimps</a>&nbsp;is pioneering an online marketplace for buying and selling data. &nbsp;Seems that they have a plausible idea - they <a href="http://venturebeat.com/2010/11/07/infochimps-funding/">recently landed 1.2 million dollars</a> in funding.</span></b><br /><b><span class="Apple-style-span" style="font-weight: normal;"><br /></span></b><br /><b>Benoit Mandelbrot</b><br /><b><span class="Apple-style-span" style="font-weight: normal;">Another noteworthy death this year that was not mentioned was the loss of the "Father of Fractals" -&nbsp;<a href="http://www.r-chart.com/2010/10/benoit-mandelbrot-father-of-fractals.html">Benoit Mandelbrot</a>. &nbsp;</span></b><br /><div><b><span class="Apple-style-span" style="font-weight: normal;"><br /></span></b></div><div><b><span class="Apple-style-span" style="font-weight: normal;"><br /></span></b></div><div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"></div></content>
<link rel="replies" type="application/atom+xml" href="http://www.r-chart.com/feeds/1594521702624711592/comments/default" title="Post Comments"/>
<link rel="replies" type="text/html" href="http://www.r-chart.com/2011/01/ten-news-stories-of-2010-and-statistics.html#comment-form" title="0 Comments"/>
<link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/3867310391951630980/posts/default/1594521702624711592"/>
<link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/3867310391951630980/posts/default/1594521702624711592"/>
<link rel="alternate" type="text/html" href="http://www.r-chart.com/2011/01/ten-news-stories-of-2010-and-statistics.html" title="Ten News Stories of 2010 - and the Statistics that Made Them"/>
<author>
<name>C</name>
<uri>http://www.blogger.com/profile/02893688387818336028</uri>
<email>[email protected]</email>
<gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif"/>
</author>
<thr:total>0</thr:total>
</entry>
<entry>
<id>tag:blogger.com,1999:blog-3867310391951630980.post-2226286964020013103</id>
<published>2010-12-31T14:17:00.000-08:00</published>
<updated>2010-12-31T14:17:25.314-08:00</updated>
<category scheme="http://www.blogger.com/atom/ns#" term="R-Chart"/>
<title type="text">R-Chart: Year End Wrap Up</title>
<content type="html"><div class="separator" style="clear: both; text-align: center;"><a href="http://4.bp.blogspot.com/_FsLa1cMTCWU/TR5WOI8cQWI/AAAAAAAAAkU/MnZPvKNyVko/s1600/happyNewYear.jpg" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"><img border="0" height="187" src="http://4.bp.blogspot.com/_FsLa1cMTCWU/TR5WOI8cQWI/AAAAAAAAAkU/MnZPvKNyVko/s400/happyNewYear.jpg" width="400" /></a></div><br /><br /><iframe align="left" frameborder="0" marginheight="0" marginwidth="0" scrolling="no" src="http://rcm.amazon.com/e/cm?t=rcha-20&amp;o=1&amp;p=8&amp;l=bpl&amp;asins=1420068725&amp;fc1=000000&amp;IS2=1&amp;lt1=_blank&amp;m=amazon&amp;lc1=0000FF&amp;bc1=000000&amp;bg1=FFFFFF&amp;f=ifr" style="align: left; height: 245px; padding-right: 10px; padding-top: 5px; width: 131px;"></iframe>Thanks to everyone who visited and commented here at R-Chart over the last year! &nbsp;Blogging has forced me to&nbsp;crystallize&nbsp;my thoughts and I hope others have benefited a bit from these meanderings. &nbsp;It it great to interact with the&nbsp;knowledgeable, educated and friendly folks in the R community. <br /><br />I make no claims to be an expert or authority on statistics, visualization, design or any of the myriad of other topics touched on over the past year. &nbsp;I appreciate all who have provided encouragement, suggestions and corrections. &nbsp;Unlike many of you more scientifically minded types who meticulously verify all conclusions before speaking, I tend to throw ideas out in the blog and make adjustments and corrections based upon feedback. &nbsp;This is really one of the great values of blogging - and so again, thank you for your responsiveness. &nbsp;It was unexpected and very helpful.<br /><br /><b>Lessons Learned</b><br />In case you blog or are thinking of blogging, I thought you might be interested in how things have worked here at R-Chart to this point.<br /><br /><b>Make Good Titles</b><br />It was interesting to find out which items were of most interest (based upon the number of hits per page). &nbsp;A great deal seems to be based upon the headline to the blog - never underestimate the value of a well-constructed-sound-byte of a title. &nbsp;This often dictates the future of a posting. &nbsp;Bad title = no response. &nbsp;I really never gave much thought to how important it is to construct a meaningful, attention grabbing title.<br /><br /><b>Blog Promotion</b><br />Promotion of each article also took more time than I expected. &nbsp;Tal over at <a href="http://www.r-bloggers.com/">R-Bloggers</a>&nbsp;really does the R community a service - bloggers who sign up have content aggregated automatically. &nbsp;If you want to draw additional readers you have to do a certain amount of footwork yourself. &nbsp;I get about 15% of total traffic to the site from search engines - which is kind of low. &nbsp;Most of the generic sites that I submitted the blog to didn't send any traffic. &nbsp;Content that was of specific interest to a given community ended up resulting in the most traffic.<br /><br />The top sites that have sent traffic this way are shown below.<br /><br /><br /><a href="http://www.reddit.com/">www.reddit.com</a> <span class="Apple-tab-span" style="white-space: pre;"> </span>&nbsp;&nbsp; &nbsp; &nbsp;15,218<br /><a href="http://www.google.com/">www.google.com</a><span class="Apple-tab-span" style="white-space: pre;"> </span>8,932<br /><a href="http://news.ycombinator.com/">news.ycombinator.com</a><span class="Apple-tab-span" style="white-space: pre;"> </span>7,211<br /><a href="http://www.r-bloggers.com/">www.r-bloggers.com</a><span class="Apple-tab-span" style="white-space: pre;"> </span>4,885<br /><a href="http://www.dzone.com/">www.dzone.com</a><span class="Apple-tab-span" style="white-space: pre;"> </span>3,682<br /><a href="http://habrahabr.ru/">habrahabr.ru</a><span class="Apple-tab-span" style="white-space: pre;"> </span>&nbsp;&nbsp; &nbsp; &nbsp; &nbsp;1,167 &nbsp;(Hi to friends in Russia for this - the highest ranking non-English site)<br /><a href="http://twitter.com/">twitter.com</a><span class="Apple-tab-span" style="white-space: pre;"> </span>&nbsp;&nbsp; &nbsp; &nbsp; &nbsp; &nbsp;&nbsp;689<br /><a href="http://www.google.co.in/">www.google.co.in</a><span class="Apple-tab-span" style="white-space: pre;"> </span>&nbsp;&nbsp; &nbsp; &nbsp; &nbsp; &nbsp;&nbsp;565<br /><a href="http://www.google.co.uk/">www.google.co.uk</a><span class="Apple-tab-span" style="white-space: pre;"> </span>&nbsp;&nbsp; &nbsp; &nbsp; &nbsp; &nbsp;&nbsp;531<br /><a href="http://www.rubyflow.com/">www.rubyflow.com</a><span class="Apple-tab-span" style="white-space: pre;"> </span>&nbsp;&nbsp; &nbsp; &nbsp; &nbsp; &nbsp;&nbsp;470<br /><br /><br /><b>R is International</b><br />I was really amazed at the international response - folks from 164 countries around the world hit the blog since its inception. &nbsp;Germany was the top non-English site in total visits and France was also well represented. <br /><br />This probably is of no surprise to many - R has been widely used in academic research and there are a relatively small number of highly specialized professionals around the world using R. &nbsp;It's obvious that the web reaches everywhere - it is not obvious who will end up visiting a given site.<br /><br /><b>Interest as Indicated by Traffic</b><br />A few other numbers of note:<br /><div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;">96,928&nbsp;<span class="Apple-tab-span" style="white-space: pre;"> </span>R-Chart Pageviews all time history as of 01/31/2010. &nbsp;</div><div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;">620<span class="Apple-tab-span" style="white-space: pre;"> </span>&nbsp;&nbsp; &nbsp; &nbsp; &nbsp;Downloads of the <a href="http://www.r-chart.com/2010/07/free-r-chart-iphone-app.html">free R-Chart iPhone application</a></div><div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;">237<span class="Apple-tab-span" style="white-space: pre;"> </span>&nbsp;&nbsp; &nbsp; &nbsp; &nbsp;Total days blogging at blogspot (as.Date('2010-12-31') - as.Date('2010-05-08'))</div><div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;">195 &nbsp;&nbsp;<span class="Apple-tab-span" style="white-space: pre;"> </span>&nbsp;&nbsp; &nbsp; &nbsp; &nbsp;Days blog has lived at r-chart.com (as.Date('2010-12-31') - as.Date('2010-06-19'))</div><div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;">158<span class="Apple-tab-span" style="white-space: pre;"> </span>&nbsp;&nbsp; &nbsp; &nbsp; &nbsp;Comments on this blog</div><div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"><br /></div><div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"><b>Advertising</b></div><div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;">Apologies to folks who are put off by the advertising. &nbsp;I had a goal to dip into this area a bit to come to offset costs and maybe buy a book or two. &nbsp;This may happen eventually...</div><div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"><br /></div><div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;">$ 42.89<span class="Apple-tab-span" style="white-space: pre;"> </span>AdSense Revenue</div><div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;">$ 13.46<span class="Apple-tab-span" style="white-space: pre;"> </span>Advertising Revenue through Amazon affiliates</div><div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"><br /></div><div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"><br /></div><div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;">Again - thanks to all - and have a Happy New Year</div></content>
<link rel="replies" type="application/atom+xml" href="http://www.r-chart.com/feeds/2226286964020013103/comments/default" title="Post Comments"/>
<link rel="replies" type="text/html" href="http://www.r-chart.com/2010/12/r-chart-year-end-wrap-up.html#comment-form" title="1 Comments"/>
<link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/3867310391951630980/posts/default/2226286964020013103"/>
<link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/3867310391951630980/posts/default/2226286964020013103"/>
<link rel="alternate" type="text/html" href="http://www.r-chart.com/2010/12/r-chart-year-end-wrap-up.html" title="R-Chart: Year End Wrap Up"/>
<author>
<name>C</name>
<uri>http://www.blogger.com/profile/02893688387818336028</uri>
<email>[email protected]</email>
<gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif"/>
</author>
<media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://4.bp.blogspot.com/_FsLa1cMTCWU/TR5WOI8cQWI/AAAAAAAAAkU/MnZPvKNyVko/s72-c/happyNewYear.jpg" height="72" width="72"/>
<thr:total>1</thr:total>
</entry>
<entry>
<id>tag:blogger.com,1999:blog-3867310391951630980.post-1042341025364158802</id>
<published>2010-12-08T04:00:00.000-08:00</published>
<updated>2010-12-08T09:52:10.277-08:00</updated>
<category scheme="http://www.blogger.com/atom/ns#" term="ggplot2"/>
<category scheme="http://www.blogger.com/atom/ns#" term="Google"/>
<title type="text">Google AI Challenge: Scores/Rank by Language</title>
<content type="html"><div class="separator" style="clear: both; text-align: center;"><a href="http://3.bp.blogspot.com/_FsLa1cMTCWU/TP9u1mZfPjI/AAAAAAAAAkA/i7m8PEC4_Pk/s1600/googleai2010_facet_by_language.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"><img border="0" height="398" src="http://3.bp.blogspot.com/_FsLa1cMTCWU/TP9u1mZfPjI/AAAAAAAAAkA/i7m8PEC4_Pk/s400/googleai2010_facet_by_language.png" width="400" /></a></div><div class="separator" style="clear: both; text-align: left;">A quick follow up to the <a href="http://www.r-chart.com/2010/12/google-ai-challenge-languages-used-by.html">previous post</a>: about the the scores in the 2010 Google AI competition relative to programming language. &nbsp;The chart above makes each language visible and discrete - and the scales are the same.</div><div class="separator" style="clear: both; text-align: left;"><br /></div><div class="separator" style="clear: both; text-align: left;"><span class="Apple-style-span" style="font-size: x-small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">library(ggplot2)</span></span></div><div class="separator" style="clear: both; text-align: left;"></div><div class="separator" style="clear: both; text-align: left;"><span class="Apple-style-span" style="font-size: x-small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">df&lt;- read.csv('googleAI2010.csv',sep=';',header=FALSE)</span></span></div><div class="separator" style="clear: both; text-align: left;"><span class="Apple-style-span" style="font-size: x-small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">df$V7 &lt;- NULL</span></span></div><div class="separator" style="clear: both; text-align: left;"><span class="Apple-style-span" style="font-size: x-small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">names(df)&lt;- c('rank', 'username','country','organization','language','elo_score')</span></span></div><br /><div class="separator" style="clear: both; text-align: center;"><span class="Apple-style-span" style="font-size: x-small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><br /></span></span></div><div class="separator" style="clear: both; text-align: center;"></div><div class="separator" style="clear: both; text-align: left;"><span class="Apple-style-span" style="font-size: x-small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">ggplot(data=df, aes(x=rank, y=elo_score, color=language)) +&nbsp;</span></span></div><div class="separator" style="clear: both; text-align: left;"><span class="Apple-style-span" style="font-size: x-small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">+ geom_point(size=1) +&nbsp;</span></span></div><div class="separator" style="clear: both; text-align: left;"><span class="Apple-style-span" style="font-size: x-small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">+ facet_wrap(~ language) + opts(title='Google AI 2010: Score by Rank for each Language')</span></span></div><div style="text-align: left;"><br /></div><div style="text-align: left;">It is based upon a simple comparison of rank and score.</div><br /><div class="separator" style="clear: both; text-align: center;"><br /></div><div class="separator" style="clear: both; text-align: center;"><a href="http://4.bp.blogspot.com/_FsLa1cMTCWU/TP9t7pJsyfI/AAAAAAAAAj8/euJnhAeN2f8/s1600/googleai2010_score_by_rank.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"><img border="0" height="398" src="http://4.bp.blogspot.com/_FsLa1cMTCWU/TP9t7pJsyfI/AAAAAAAAAj8/euJnhAeN2f8/s400/googleai2010_score_by_rank.png" width="400" /></a></div><br /><br /><span class="Apple-style-span" style="font-size: x-small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">df&lt;- read.csv('googleAI2010.csv',sep=';',header=FALSE)</span></span><br /><span class="Apple-style-span" style="font-size: x-small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">df$V7 &lt;- NULL</span></span><br /><span class="Apple-style-span" style="font-size: x-small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">names(df)&lt;- c('rank', 'username','country','organization','language','elo_score')</span></span><br /><br /><span class="Apple-style-span" style="font-size: x-small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">ggplot(data=df, aes(x=rank, y=elo_score)) + geom_point(size=1) + opts(title='Google AI Score by Rank')</span></span><br /><br /><br />Another approach to viewing this information is a histogram by score (which ignores rank). &nbsp;With a binwidth of 100 (and ignoring low scores of people who signed up but who dropped out relatively early) a (nearly) bimodal distribution appears.<br /><div class="separator" style="clear: both; text-align: center;"><a href="http://2.bp.blogspot.com/_FsLa1cMTCWU/TP9vbsueChI/AAAAAAAAAkE/CXEGheJYM9g/s1600/googleai2010_histogram.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"><img border="0" height="398" src="http://2.bp.blogspot.com/_FsLa1cMTCWU/TP9vbsueChI/AAAAAAAAAkE/CXEGheJYM9g/s400/googleai2010_histogram.png" width="400" /></a></div><br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: x-small;">qplot(data=df, x=elo_score, geom='histogram', binwidth=100)</span></span><br /><br /><br />Any ideas about why this is not normal? &nbsp;Is there some aspect of ELO scoring that leads to this shape? &nbsp;Or are there different types of programmers represented?<br /><br />This can be broken down by language. &nbsp;To avoid difficulty distinguishing colors, the rainbow palette is used and a few languages are not reported (since they were not highly represented in the competition).<br /><br /><span class="Apple-style-span" style="font-size: x-small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">library(sqldf)</span></span><br /><br /><span class="Apple-style-span" style="font-size: x-small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">df2=sqldf("select * from df where language not in ('Groovy','Scala','Go','OCaml')")</span></span><br /><span class="Apple-style-span" style="font-size: x-small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">df2$language=factor(df2$language)</span></span><br /><span class="Apple-style-span" style="font-size: x-small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">qplot(data=df2, x=elo_score, fill=language, geom='histogram', binwidth=100) + scale_fill_manual(values=rainbow(12))&nbsp;</span></span><br /><br /><div><br /></div><br /><div class="separator" style="clear: both; text-align: center;"><a href="http://3.bp.blogspot.com/_FsLa1cMTCWU/TP9wSRv87sI/AAAAAAAAAkI/9kP8BvDvDNQ/s1600/googleai2010_histogram_by_lang.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"><img border="0" height="319" src="http://3.bp.blogspot.com/_FsLa1cMTCWU/TP9wSRv87sI/AAAAAAAAAkI/9kP8BvDvDNQ/s320/googleai2010_histogram_by_lang.png" width="320" /></a></div>As mentioned in the&nbsp;<a href="http://www.r-chart.com/2010/12/google-ai-challenge-languages-used-by.html">previous post</a>, the data is available at GitHub - feel free to post some of your own visualizations of this data.</content>
<link rel="replies" type="application/atom+xml" href="http://www.r-chart.com/feeds/1042341025364158802/comments/default" title="Post Comments"/>
<link rel="replies" type="text/html" href="http://www.r-chart.com/2010/12/google-ai-challenge-scoresrank-by.html#comment-form" title="5 Comments"/>
<link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/3867310391951630980/posts/default/1042341025364158802"/>
<link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/3867310391951630980/posts/default/1042341025364158802"/>
<link rel="alternate" type="text/html" href="http://www.r-chart.com/2010/12/google-ai-challenge-scoresrank-by.html" title="Google AI Challenge: Scores/Rank by Language"/>
<author>
<name>C</name>
<uri>http://www.blogger.com/profile/02893688387818336028</uri>
<email>[email protected]</email>
<gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif"/>
</author>
<media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://3.bp.blogspot.com/_FsLa1cMTCWU/TP9u1mZfPjI/AAAAAAAAAkA/i7m8PEC4_Pk/s72-c/googleai2010_facet_by_language.png" height="72" width="72"/>
<thr:total>5</thr:total>
</entry>
<entry>
<id>tag:blogger.com,1999:blog-3867310391951630980.post-6168320883529187100</id>
<published>2010-12-02T16:18:00.000-08:00</published>
<updated>2010-12-08T04:03:40.307-08:00</updated>
<category scheme="http://www.blogger.com/atom/ns#" term="ggplot2"/>
<category scheme="http://www.blogger.com/atom/ns#" term="Ruby"/>
<category scheme="http://www.blogger.com/atom/ns#" term="Google"/>
<title type="text">Google AI Challenge: Languages Used by the Best Programmers</title>
<content type="html"><b><span class="Apple-style-span" style="font-weight: normal;"></span></b><br /><b></b><br /><b><div class="separator" style="clear: both; text-align: center;"><a href="http://1.bp.blogspot.com/_FsLa1cMTCWU/TPgxWY_QNZI/AAAAAAAAAjc/ORVJjtoDBvg/s1600/program_language_density_plot.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"><img border="0" height="398" src="http://1.bp.blogspot.com/_FsLa1cMTCWU/TPgxWY_QNZI/AAAAAAAAAjc/ORVJjtoDBvg/s400/program_language_density_plot.png" width="400" /></a></div><div class="separator" style="clear: both; text-align: center;"><br /></div><div><br /></div><div><span class="Apple-style-span" style="font-weight: normal;">The </span><a href="http://www.zdnet.com/blog/burnette/its-youth-vs-experience-in-the-google-ai-challenge-and-the-kids-are-winning/2123?tag=mantle_skin;content"><span class="Apple-style-span" style="font-weight: normal;">Google AI Challenge</span></a><span class="Apple-style-span" style="font-weight: normal;"> recently wrapped up with a </span><a href="http://www.zdnet.com/blog/burnette/hungarian-lisp-developer-walks-away-with-google-ai-contest/2131"><span class="Apple-style-span" style="font-weight: normal;">Lisp developer from Hungary</span></a><span class="Apple-style-span" style="font-weight: normal;"> as the winner. &nbsp;The competition challenges contestants to create bots that push the limits of AI and game theory. &nbsp;These bots compete against one another, and a </span><a href="http://ai-contest.com/rankings.php"><span class="Apple-style-span" style="font-weight: normal;">complete ranking</span></a><span class="Apple-style-span" style="font-weight: normal;"> of competitors is available. &nbsp;The big story today is that the winner (</span><a href="http://quotenil.com/"><span class="Apple-style-span" style="font-weight: normal;">Gábor Melis</span></a><span class="Apple-style-span" style="font-weight: normal;">) used Lisp to beat out over 4000 other contestants around the world using a host of different programming languages. &nbsp;&nbsp;</span></div></b><br /><br /><br /><div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"><b><span class="Apple-style-span" style="font-weight: normal;"></span></b></div><b></b><br /><b><div><span class="Apple-style-span" style="font-weight: normal;">Paul Graham has stated that&nbsp;</span><a href="http://www.paulgraham.com/arcll1.html"><span class="Apple-style-span" style="font-weight: normal;">Java was designed for "average" programmers</span></a><span class="Apple-style-span" style="font-weight: normal;">&nbsp;while other languages (</span><a href="http://www.paulgraham.com/avg.html"><span class="Apple-style-span" style="font-weight: normal;">like Lisp</span></a><span class="Apple-style-span" style="font-weight: normal;">) are for good programmers. &nbsp;The fact that the winner of the competition wrote in Lisp seems to support this assertion. &nbsp;Or should we see Mr. Melis as an anomaly who happened to use Lisp for this task?</span></div></b><br /><br /><br /><b>Programming Languages Usage</b><br /><b><span class="Apple-style-span" style="font-weight: normal;"></span></b><br /><b></b><br /><b><div>Java, C++, Python and C# were heavily used overall.</div></b><br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace; font-size: x-small;">&nbsp;&nbsp; &nbsp; language count(*)</span><br /><br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: x-small;">1 &nbsp; &nbsp; &nbsp; &nbsp;Java &nbsp; &nbsp; 1634</span></span><br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: x-small;">2 &nbsp; &nbsp; &nbsp; &nbsp; C++ &nbsp; &nbsp; 1232</span></span><br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: x-small;">3 &nbsp; &nbsp; &nbsp;Python &nbsp; &nbsp; &nbsp;948</span></span><br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: x-small;">4 &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;C# &nbsp; &nbsp; &nbsp;485</span></span><br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: x-small;">5 &nbsp; &nbsp; &nbsp; &nbsp; PHP &nbsp; &nbsp; &nbsp; 80</span></span><br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: x-small;">6 &nbsp; &nbsp; &nbsp; &nbsp;Ruby &nbsp; &nbsp; &nbsp; 55</span></span><br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: x-small;">7 &nbsp; &nbsp; Haskell &nbsp; &nbsp; &nbsp; 51</span></span><br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: x-small;">8 &nbsp; &nbsp; &nbsp; &nbsp;Perl &nbsp; &nbsp; &nbsp; 42</span></span><br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: x-small;">9 &nbsp; &nbsp; &nbsp; &nbsp;Lisp &nbsp; &nbsp; &nbsp; 33</span></span><br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: x-small;">10 Javascript &nbsp; &nbsp; &nbsp; 19</span></span><br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: x-small;">11 &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;C &nbsp; &nbsp; &nbsp; 18</span></span><br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: x-small;">12 &nbsp; &nbsp; &nbsp;OCaml &nbsp; &nbsp; &nbsp; 12</span></span><br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: x-small;">13 &nbsp; &nbsp; &nbsp; &nbsp; Go &nbsp; &nbsp; &nbsp; &nbsp;6</span></span><br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: x-small;">14 &nbsp; &nbsp; &nbsp;Scala &nbsp; &nbsp; &nbsp; &nbsp;4</span></span><br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: x-small;">15 &nbsp; &nbsp; Groovy &nbsp; &nbsp; &nbsp; &nbsp;1</span></span><br /><div><br /></div><div><b>In the Top 200</b></div><div><div><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: x-small;">&nbsp;&nbsp; &nbsp; language count(*)</span></span></div><div><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: x-small;">1 &nbsp; &nbsp; &nbsp; &nbsp;Java &nbsp; &nbsp; &nbsp; 70</span></span></div><div><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: x-small;">2 &nbsp; &nbsp; &nbsp; &nbsp; C++ &nbsp; &nbsp; &nbsp; 64</span></span></div><div><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: x-small;">3 &nbsp; &nbsp; &nbsp;Python &nbsp; &nbsp; &nbsp; 34</span></span></div><div><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: x-small;">4 &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;C# &nbsp; &nbsp; &nbsp; 17</span></span></div><div><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: x-small;">5 &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; C &nbsp; &nbsp; &nbsp; &nbsp;4</span></span></div><div><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: x-small;">6 &nbsp; &nbsp; Haskell &nbsp; &nbsp; &nbsp; &nbsp;3</span></span></div><div><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: x-small;">7 &nbsp; &nbsp; &nbsp; &nbsp; PHP &nbsp; &nbsp; &nbsp; &nbsp;3</span></span></div><div><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: x-small;">8 &nbsp; &nbsp; &nbsp; &nbsp;Ruby &nbsp; &nbsp; &nbsp; &nbsp;2</span></span></div><div><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: x-small;">9 &nbsp;Javascript &nbsp; &nbsp; &nbsp; &nbsp;1</span></span></div><div><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: x-small;">10 &nbsp; &nbsp; &nbsp; Lisp &nbsp; &nbsp; &nbsp; &nbsp;1</span></span></div><div><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: x-small;">11 &nbsp; &nbsp; &nbsp;OCaml &nbsp; &nbsp; &nbsp; &nbsp;1</span></span></div></div><br /><br /><b>Top 100</b><br /><br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: x-small;">1 &nbsp; &nbsp; Java &nbsp; &nbsp; &nbsp; 33</span></span><br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: x-small;">2 &nbsp; &nbsp; &nbsp;C++ &nbsp; &nbsp; &nbsp; 32</span></span><br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: x-small;">3 &nbsp; Python &nbsp; &nbsp; &nbsp; 20</span></span><br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: x-small;">4 &nbsp; &nbsp; &nbsp; C# &nbsp; &nbsp; &nbsp; &nbsp;9</span></span><br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: x-small;">5 &nbsp; &nbsp; &nbsp; &nbsp;C &nbsp; &nbsp; &nbsp; &nbsp;3</span></span><br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: x-small;">6 &nbsp;Haskell &nbsp; &nbsp; &nbsp; &nbsp;1</span></span><br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: x-small;">7 &nbsp; &nbsp; Lisp &nbsp; &nbsp; &nbsp; &nbsp;1</span></span><br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: x-small;">8 &nbsp; &nbsp;OCaml &nbsp; &nbsp; &nbsp; &nbsp;1</span></span><br /><div><br /></div><div><b>Top 10</b></div><div><div><span class="Apple-style-span" style="font-size: x-small;">&nbsp;</span><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: x-small;">&nbsp;language count(*)</span></span></div><div><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: x-small;">1 &nbsp; &nbsp; Java &nbsp; &nbsp; &nbsp; &nbsp;4</span></span></div><div><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: x-small;">2 &nbsp; &nbsp; &nbsp;C++ &nbsp; &nbsp; &nbsp; &nbsp;3</span></span></div><div><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: x-small;">3 &nbsp; &nbsp; &nbsp; C# &nbsp; &nbsp; &nbsp; &nbsp;2</span></span></div><div><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: x-small;">4 &nbsp; &nbsp; Lisp &nbsp; &nbsp; &nbsp; &nbsp;1</span></span></div><div><br /></div></div><br />The plot above is a bit difficult to discern due to the number of languages represented (and similarity in colors). &nbsp;So here is a breakdown by language.<br /><br /><div class="separator" style="clear: both; text-align: center;"><a href="http://3.bp.blogspot.com/_FsLa1cMTCWU/TPgyBXF3PhI/AAAAAAAAAjg/M6v-8WEvv98/s1600/lisp_density_plot.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"><img border="0" height="398" src="http://3.bp.blogspot.com/_FsLa1cMTCWU/TPgyBXF3PhI/AAAAAAAAAjg/M6v-8WEvv98/s400/lisp_density_plot.png" width="400" /></a></div>Lisp does appear to be skewed towards higher ranking. &nbsp;But even more striking are the C hippies:<br /><br /><div class="separator" style="clear: both; text-align: center;"><a href="http://1.bp.blogspot.com/_FsLa1cMTCWU/TPgyKSc-rvI/AAAAAAAAAjk/5Si6vyFPEvQ/s1600/c_density_plot.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"><img border="0" height="398" src="http://1.bp.blogspot.com/_FsLa1cMTCWU/TPgyKSc-rvI/AAAAAAAAAjk/5Si6vyFPEvQ/s400/c_density_plot.png" width="400" /></a></div>The functional crowd represented with Haskell also ranked on the higher end:<br /><div class="separator" style="clear: both; text-align: center;"><a href="http://1.bp.blogspot.com/_FsLa1cMTCWU/TPg0CBK3W5I/AAAAAAAAAj4/q3iEbQvyMhc/s1600/haskell_density_plot.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"><img border="0" height="398" src="http://1.bp.blogspot.com/_FsLa1cMTCWU/TPg0CBK3W5I/AAAAAAAAAj4/q3iEbQvyMhc/s400/haskell_density_plot.png" width="400" /></a></div><br /><br />How about Java? &nbsp;There is a trend towards the average - but a significantly larger number of entrants used Java. &nbsp;It also is a language taught in many colleges, and might reflect greater student participation in these languages (although MIT did focus on Lisp back in the day...).<br /><div class="separator" style="clear: both; text-align: center;"><a href="http://1.bp.blogspot.com/_FsLa1cMTCWU/TPgybPKcEbI/AAAAAAAAAjs/2THlgOgE-mo/s1600/java_density_plot.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"><img border="0" height="398" src="http://1.bp.blogspot.com/_FsLa1cMTCWU/TPgybPKcEbI/AAAAAAAAAjs/2THlgOgE-mo/s400/java_density_plot.png" width="400" /></a></div>How about representatives from the Microsoft? &nbsp;Einstein and Elvis showed up - Mort was not interested.<br /><div class="separator" style="clear: both; text-align: center;"><a href="http://1.bp.blogspot.com/_FsLa1cMTCWU/TPgy6h6N5EI/AAAAAAAAAj0/8PXgOSzN8ck/s1600/csharp_density_plot.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"><img border="0" height="398" src="http://1.bp.blogspot.com/_FsLa1cMTCWU/TPgy6h6N5EI/AAAAAAAAAj0/8PXgOSzN8ck/s400/csharp_density_plot.png" width="400" /></a></div><br />I can post charts of other languages if anyone asks - otherwise, <a href="https://github.com/ezgraphs/R-Programs/tree/master/google-ai-2010/">download the files for yourself</a> and draw your own conclusions. &nbsp;And congratulations to&nbsp;<b></b><br /><b></b><br /><b><div style="display: inline !important;"><span class="Apple-style-span" style="font-weight: normal;"><a href="http://quotenil.com/">Gábor Melis</a>&nbsp;- I am again feeling the inspiration to delve into the mysteries of Lisp and meander among mountains of parenthesis...</span></div></b><br /><br /><br /><br /><b>Methodology Used</b><br />No need to proceed further unless you are interested in how the results listed above were derived. <br /><br />Basically, I used Ruby to scrape the results from the <a href="http://ai-contest.com/rankings.php">Google AI Rankings site</a>. &nbsp;The results were read into Ruby, and ggplot2 and sqldf libraries were used to analyze the results.<br /><br /><b>Get the Data into R</b><br />So to find out more...I whipped up a&nbsp;<a href="https://github.com/ezgraphs/R-Programs/blob/master/google-ai-2010/googleAI2010.rb">ruby script</a>&nbsp;to create a <a href="https://github.com/ezgraphs/R-Programs/blob/master/google-ai-2010/googleAI2010.csv">delimited file</a> from the 47 page listing online. &nbsp;(Feel free to get these from their GitHub location and do some additional validation/analysis of your own). &nbsp; Read this file into R:<br /><br /><br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: x-small;">df&lt;- read.csv('googleAI2010.csv',sep=';',header=FALSE)</span></span><br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: x-small;">df$V7 &lt;- NULL</span></span><br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: x-small;">names(df)&lt;- c('rank', 'username','country','organization','language','elo_score')</span></span><br /><br /><br /><b>Sanity Check</b><br />Most of this work can be done in idiomatic R (which has some significant Lisp influences) - which might be a better way to honor the winner. &nbsp;However, I find myself using sqlite more and more these days - particularly in mobile development. &nbsp;So I used the sqldf library which uses this database behind the scenes.<br /><br /><a href="http://ai-contest.com/country_rankings.php">Country rankings</a> are available online, and the following emulates these results. &nbsp;Specifically, the number of entrants in the top 200 ranked contestants from each country can be derived as follows:<br /><b></b><br /><b></b><br /><b><div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"><span class="Apple-style-span" style="font-weight: normal;"><br /></span></div></b><br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: x-small;">library('sqldf')</span></span><br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: x-small;"><b></b></span></span><br /><b></b><br /><b><div style="display: inline !important;"><span class="Apple-style-span" style="font-weight: normal;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: x-small;">top200=df[df$rank &lt;= 200,]</span></span></span></div></b><br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: x-small;"><b></b></span></span><br /><b><span class="Apple-style-span" style="font-weight: normal;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: x-small;">sqldf('select country, count(*) from top200 group by country order by 2 desc')</span></span></span><br /><span class="Apple-style-span" style="font-weight: normal;"><br /></span><br /><span class="Apple-style-span" style="font-weight: normal;"><a href="http://ai-contest.com/organization_rankings.php">Organization rankings</a>&nbsp;are similar, representing the top organizations within the top 100. &nbsp;There are some anomalies here, the highest ranking "Other" is not shown in the online version for obvious reasons, and only the most of these have only one entrant in the top 100 an are listed in an arbitrary manner. &nbsp;However, the results are otherwise the same in R.</span><br /><span class="Apple-style-span" style="font-weight: normal;"><br /></span><br /><span class="Apple-style-span" style="font-weight: normal;"><b><b></b></b></span><br /><b><b></b></b></b><br /><b><b><b><div style="display: inline !important; margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"><span class="Apple-style-span" style="font-weight: normal;">t<span class="Apple-style-span" style="font-size: x-small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">op100=df[df$rank &lt;= 100,]</span></span></span></div></b><div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"></div><div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"><span class="Apple-style-span" style="font-weight: normal;"><span class="Apple-style-span" style="font-size: x-small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">sqldf('select organization, count(*) from top100 group by organization order by 2 desc')</span></span></span></div><div><span class="Apple-style-span" style="font-weight: normal;"><span class="Apple-style-span" style="font-size: x-small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><br /></span></span></span></div></b><br /><span class="Apple-style-span" style="font-weight: normal;"><br /></span><br />R Code<br /><span class="Apple-style-span" style="font-weight: normal;">The following are additional snippets of R code used to generate the results above.</span><br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: x-small; font-weight: normal;"><span class="Apple-style-span" style="font-family: 'Times New Roman';"><span class="Apple-style-span" style="font-size: small;"><br /></span></span></span></span><br /><span class="Apple-style-span" style="font-size: x-small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-weight: normal;"># Language Usage</span></span></span><br /><span class="Apple-style-span" style="font-weight: normal;"><b></b></span><br /><b><div style="display: inline !important;"><span class="Apple-style-span" style="font-weight: normal;"><span class="Apple-style-span" style="font-size: x-small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">sqldf('select language, count(*) from df group by language order by 2 desc')</span></span></span></div></b><br /><span class="Apple-style-span" style="font-weight: normal;"></span><br /><span class="Apple-style-span" style="font-size: x-small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-weight: normal;">sqldf('select language, count(*) from top200 group by language order by 2 desc')</span></span></span><br /><span class="Apple-style-span" style="font-size: x-small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-weight: normal;">sqldf('select language, count(*) from top100 group by language order by 2 desc')</span></span></span><br /><br /><span class="Apple-style-span" style="font-size: x-small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-weight: normal;"><br /></span></span></span><br /><span class="Apple-style-span" style="font-size: x-small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-weight: normal;">top10=df[df$rank &lt;= 10,]</span></span></span><br /><span class="Apple-style-span" style="font-size: x-small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-weight: normal;">sqldf('select language, count(*) from top10 group by language order by 2 desc')</span></span></span><br /><span class="Apple-style-span" style="font-weight: normal;"><br /></span><br /><br /><div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"><span class="Apple-style-span" style="font-weight: normal;">&nbsp;If you fiddle enough with the bucket size for histograms, you might be able to draw some conclusions... but the density plot seemed like a nicer option. &nbsp;</span></div><div><span class="Apple-style-span" style="font-weight: normal;"><br /></span></div><div><span class="Apple-style-span" style="font-weight: normal;"><b></b></span><br /><span class="Apple-style-span" style="font-weight: normal;"><b><div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"><span class="Apple-style-span" style="font-weight: normal;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace; font-size: x-small;">library('ggplot2')</span></span></div></b></span></div><br /><span class="Apple-style-span" style="font-size: x-small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-weight: normal;"># Substitute your favorite language of those available for Lisp below</span></span></span><br /><span class="Apple-style-span" style="font-size: x-small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-weight: normal;">qplot(data=df[df$language=='Lisp',], x=rank, geom='histogram', binwidth=1000) + opts(title='Lisp')&nbsp;</span></span></span><br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><br /></span><br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-family: 'Times New Roman';"><a href="http://3.bp.blogspot.com/_FsLa1cMTCWU/TPgoWQmQXrI/AAAAAAAAAjI/9hHaM-lAQ-A/s1600/lisp_ranking_histogram.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"><img border="0" height="319" src="http://3.bp.blogspot.com/_FsLa1cMTCWU/TPgoWQmQXrI/AAAAAAAAAjI/9hHaM-lAQ-A/s320/lisp_ranking_histogram.png" style="cursor: move;" width="320" /></a></span></span><br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><br /></span><br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-weight: normal;"><span class="Apple-style-span" style="font-size: x-small;"># The density plot at the top of this posting:</span></span></span><br /><span class="Apple-style-span" style="font-weight: normal;"><span class="Apple-style-span" style="font-size: x-small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"></span></span></span><br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-weight: normal;"><span class="Apple-style-span" style="font-size: x-small;">ggplot(data=df, aes(rank, fill=language)) +&nbsp;</span></span><br /><span class="Apple-style-span" style="font-weight: normal;"><span class="Apple-style-span" style="font-size: x-small;">&nbsp;&nbsp;geom_density(alpha = 0.2) +&nbsp;</span></span><br /><span class="Apple-style-span" style="font-weight: normal;"><span class="Apple-style-span" style="font-family: 'Times New Roman';"><b><span class="Apple-style-span" style="font-size: x-small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"></span></span></b></span></span><br /><b><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"></div><div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"><span class="Apple-style-span" style="font-weight: normal;"><span class="Apple-style-span" style="font-size: x-small;">&nbsp;xlim(0,5000) +</span></span></div></span></b><b><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"></span></b><br /><span class="Apple-style-span" style="font-weight: normal;"><span class="Apple-style-span" style="font-size: x-small;">&nbsp;&nbsp;opts(title='2010 Google AI Challenge Rankings')</span></span><br /><span class="Apple-style-span" style="font-weight: normal;"><span class="Apple-style-span" style="font-size: x-small;"><br /></span></span><br /><span class="Apple-style-span" style="font-weight: normal;"><span class="Apple-style-span" style="font-size: x-small;">ggsave('program_language_density_plot.png')</span></span><br /><span class="Apple-style-span" style="font-weight: normal;"><span class="Apple-style-span" style="font-size: x-small;"><br /></span></span><br /><span class="Apple-style-span" style="font-weight: normal;"><span class="Apple-style-span" style="font-size: x-small;"># Breakdown by language:</span></span><br /><br /><span class="Apple-style-span" style="font-weight: normal;"><span class="Apple-style-span" style="font-size: x-small;">ggplot(data=df[df$language=='Scala',], aes(rank, fill=language)) + geom_density(alpha = 0.2) + xlim(0,5000) + opts(title='Scala')&nbsp;</span></span></span></b><br /><div><b><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-weight: normal;"><span class="Apple-style-span" style="font-size: x-small;"><br /></span></span></span></b></div><div><b><span class="Apple-style-span" style="font-weight: normal;"><span class="Apple-style-span" style="font-family: Times, 'Times New Roman', serif;"><b>Update:</b> &nbsp;I have been keeping up with the comments - and sketched out some other ways of looking at the data in </span><a href="http://www.r-chart.com/2010/12/google-ai-challenge-scoresrank-by.html"><span class="Apple-style-span" style="font-family: Times, 'Times New Roman', serif;">another post</span></a><span class="Apple-style-span" style="font-family: Times, 'Times New Roman', serif;">.</span></span></b></div></content>
<link rel="replies" type="application/atom+xml" href="http://www.r-chart.com/feeds/6168320883529187100/comments/default" title="Post Comments"/>
<link rel="replies" type="text/html" href="http://www.r-chart.com/2010/12/google-ai-challenge-languages-used-by.html#comment-form" title="6 Comments"/>
<link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/3867310391951630980/posts/default/6168320883529187100"/>
<link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/3867310391951630980/posts/default/6168320883529187100"/>
<link rel="alternate" type="text/html" href="http://www.r-chart.com/2010/12/google-ai-challenge-languages-used-by.html" title="Google AI Challenge: Languages Used by the Best Programmers"/>
<author>
<name>C</name>
<uri>http://www.blogger.com/profile/02893688387818336028</uri>
<email>[email protected]</email>
<gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif"/>
</author>
<media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://1.bp.blogspot.com/_FsLa1cMTCWU/TPgxWY_QNZI/AAAAAAAAAjc/ORVJjtoDBvg/s72-c/program_language_density_plot.png" height="72" width="72"/>
<thr:total>6</thr:total>
</entry>
<entry>
<id>tag:blogger.com,1999:blog-3867310391951630980.post-1179246055042215866</id>
<published>2010-11-10T16:24:00.000-08:00</published>
<updated>2010-11-10T16:24:08.016-08:00</updated>
<category scheme="http://www.blogger.com/atom/ns#" term="ggplot2"/>
<category scheme="http://www.blogger.com/atom/ns#" term="Financial"/>
<title type="text">Mortgage Calculator (and Amortization Charts) with R</title>
<content type="html"><div class="separator" style="clear: both; text-align: center;"><a href="http://1.bp.blogspot.com/_FsLa1cMTCWU/TNs3fPWMZaI/AAAAAAAAAhk/OM5HulgRhZM/s1600/mortgage_calculator.PNG" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"><img border="0" height="276" src="http://1.bp.blogspot.com/_FsLa1cMTCWU/TNs3fPWMZaI/AAAAAAAAAhk/OM5HulgRhZM/s320/mortgage_calculator.PNG" width="320" /></a></div><br />Mortgage rates have been at historic lows recently. &nbsp;The rates are posted <a href="http://www.bankrate.com/">various places</a> online along with simple mortgage calculators. &nbsp;Such calculators illustrate the payment schedule for a mortgage based upon selected terms. But with less than a dozen lines of R code, you can do a far more sophisticated analysis.<br /><br /><b>Mortgage Calculation Function</b><br />Rather than reinvent the wheel, you can work with <a href="http://faculty.ucr.edu/~tgirke/Documents/R_BioCond/My_R_Scripts/mortgage.R">this nice R function</a>&nbsp;by&nbsp;Thomas Girke (Associate Professor of Bioinformatics&nbsp;over at UC Riverside). &nbsp;At the R prompt, you can grab it from its home online by calling source:<br /><br /><span class="Apple-style-span" style="font-family: courier, monaco, monospace, sans-serif; font-size: 13px;">source("http://faculty.ucr.edu/~tgirke/Documents/R_BioCond/My_R_Scripts/mortgage.R")</span><br /><span class="Apple-style-span" style="font-family: courier, monaco, monospace, sans-serif; font-size: 13px;"><br /></span><br /><span class="Apple-style-span" style="font-family: courier, monaco, monospace, sans-serif; font-size: 13px;"><br /></span><br /><span class="Apple-style-span" style="font-family: courier, monaco, monospace, sans-serif; font-size: 13px;"><span class="Apple-style-span" style="font-family: 'Times New Roman'; font-size: medium;">This loads the function and outputs a helpful description of the function:</span></span><br /><span class="Apple-style-span" style="font-family: courier, monaco, monospace, sans-serif; font-size: 13px;"><br /></span><br /><span class="Apple-style-span" style="font-family: courier, monaco, monospace, sans-serif; font-size: 13px;"></span><br /><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">The monthly mortgage payments and amortization rates can be calculted with the mortgage() function like this:&nbsp;</span></span><br /><br /><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">&nbsp;&nbsp; &nbsp; &nbsp; &nbsp;mortgage(P=500000, I=6, L=30, amort=T, plotData=T)</span></span><br /><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">&nbsp;&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;P = principal (loan amount)</span></span><br /><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">&nbsp;&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;I = annual interest rate</span></span><br /><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">&nbsp;&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;L = length of the loan in years&nbsp;</span></span><br /><div><br /></div><div>So keep in mind that there is a huge amount of R code available online:&nbsp;</div><div><ul><li><a href="http://cran.r-project.org/web/packages/">CRAN</a>&nbsp;</li><li><a href="https://github.com/languages/R">Github</a>&nbsp;</li><li><a href="http://code.google.com/hosting/search?q=label:R">Google Code</a>&nbsp;</li></ul></div><div>are just the beginning. &nbsp;</div><div><br /></div><div><br /></div><div><b>Instant R Graphical User Interfaces</b></div><br />Rather than simply calling the function directly, you can quickly construct a GUI input widget using the fgui library.<br /><br /><span class="Apple-style-span" style="font-family: courier, monaco, monospace, sans-serif; font-size: 13px;">library(fgui)</span><br /><span class="Apple-style-span" style="font-family: courier, monaco, monospace, sans-serif; font-size: small;"><span class="Apple-style-span" style="font-size: 13px;">gui(mortgage)</span></span><br /><span class="Apple-style-span" style="font-family: courier, monaco, monospace, sans-serif; font-size: small;"><span class="Apple-style-span" style="font-size: 13px;"><br /></span></span><br /><span class="Apple-style-span" style="font-family: courier, monaco, monospace, sans-serif; font-size: small;"><span class="Apple-style-span" style="font-size: 13px;"><span class="Apple-style-span" style="font-family: 'Times New Roman'; font-size: medium;">With this trivial invocation, a window pops up.</span></span></span><br /><span class="Apple-style-span" style="font-family: courier, monaco, monospace, sans-serif; font-size: small;"><span class="Apple-style-span" style="font-size: 13px;"><span class="Apple-style-span" style="font-family: 'Times New Roman'; font-size: medium;"><br /></span></span></span><br /><div class="separator" style="clear: both; text-align: center;"><a href="http://2.bp.blogspot.com/_FsLa1cMTCWU/TNssTo9JGWI/AAAAAAAAAhU/L5_IfLDdJi4/s1600/mortgage_fgui.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"><img border="0" height="186" src="http://2.bp.blogspot.com/_FsLa1cMTCWU/TNssTo9JGWI/AAAAAAAAAhU/L5_IfLDdJi4/s400/mortgage_fgui.png" width="400" /></a></div><span class="Apple-style-span" style="font-family: courier, monaco, monospace, sans-serif; font-size: small;"><span class="Apple-style-span" style="font-size: 13px;"><span class="Apple-style-span" style="font-family: 'Times New Roman'; font-size: medium;"><br /></span></span></span><br /><span class="Apple-style-span" style="font-family: courier, monaco, monospace, sans-serif; font-size: small;"><span class="Apple-style-span" style="font-size: 13px;"><span class="Apple-style-span" style="font-family: 'Times New Roman'; font-size: medium;"><br /></span></span></span><br /><span class="Apple-style-span" style="font-family: courier, monaco, monospace, sans-serif; font-size: small;"><span class="Apple-style-span" style="font-size: 13px;"><span class="Apple-style-span" style="font-family: 'Times New Roman'; font-size: medium;">Not terribly fancy, but about the simplest way you will ever be able to construct a GUI! &nbsp;In this case a mortgage amount of $90,000 for 10 years at 3.75% is illustrated. &nbsp;</span></span></span><br /><span class="Apple-style-span" style="font-family: courier, monaco, monospace, sans-serif; font-size: small;"><span class="Apple-style-span" style="font-size: 13px;"><span class="Apple-style-span" style="font-family: 'Times New Roman'; font-size: medium;"><br /></span></span></span><br /><span class="Apple-style-span" style="font-family: courier, monaco, monospace, sans-serif; font-size: small;"><span class="Apple-style-span" style="font-size: 13px;"><span class="Apple-style-span" style="font-family: 'Times New Roman'; font-size: medium;">After entering these values, click OK to actually call the function. &nbsp;This results in a &nbsp;good deal of interesting output. &nbsp;Close the pop up window and look at the R Console:</span></span></span><br /><span class="Apple-style-span" style="font-family: courier, monaco, monospace, sans-serif; font-size: small;"><span class="Apple-style-span" style="font-size: 13px;"><span class="Apple-style-span" style="font-family: 'Times New Roman'; font-size: medium;"><br /></span></span></span><br /><span class="Apple-style-span" style="font-family: courier, monaco, monospace, sans-serif; font-size: small;"><span class="Apple-style-span" style="font-size: 13px;"><span class="Apple-style-span" style="font-family: 'Times New Roman'; font-size: medium;"></span></span></span><br /><span class="Apple-style-span" style="font-family: courier, monaco, monospace, sans-serif; font-size: small;"><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">Monthly payment: $900.5512 (stored in monthPay)</span></span><br /><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><br /></span></span><br /><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">&nbsp;&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;Total cost: $108066.1</span></span><br /><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><br /></span></span><br /><br /><span class="Apple-style-span" style="font-family: Times, 'Times New Roman', serif;">As indicated in this message, an R object named monthPay contains the amount of the monthly payment and can be used in subsequent R commands and calculations. &nbsp;You also are greeted with a graph illustrating annual interest and payments as a stacked bar chart.</span><br /><div class="separator" style="clear: both; text-align: center;"><a href="http://3.bp.blogspot.com/_FsLa1cMTCWU/TNsuNb_5WdI/AAAAAAAAAhY/Y_BKSSSv9Yg/s1600/mortgage_annual_interest_and_principal.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"><img border="0" height="400" src="http://3.bp.blogspot.com/_FsLa1cMTCWU/TNsuNb_5WdI/AAAAAAAAAhY/Y_BKSSSv9Yg/s400/mortgage_annual_interest_and_principal.png" width="400" /></a></div><br /><span class="Apple-style-span" style="font-family: Times, 'Times New Roman', serif;">Plenty of useful information! &nbsp;But that's not all...</span><br /><span class="Apple-style-span" style="font-family: Times, 'Times New Roman', serif;"><br /></span><br /><span class="Apple-style-span" style="font-family: Times, 'Times New Roman', serif;"><b>Beyond the Basics</b></span><br /><span class="Apple-style-span" style="font-family: Times, 'Times New Roman', serif;">You might have noticed a number of messages regarding data stored in R objects. &nbsp;This is where the power of R exceeds that of any standard mortgage calculator. &nbsp;These objects can serve as input to other calculations or charting operations.</span><br /><span class="Apple-style-span" style="font-family: Times, 'Times New Roman', serif;"><br /></span><br /><span class="Apple-style-span" style="font-family: Times, 'Times New Roman', serif;">The aDFmonth object contains amortization data for each month, while aDFyear contains annual information. In the following example, a new data frame is created from the monthly data that does not include the amortization information and plot it using ggplot2. &nbsp;(The amortization data is a&nbsp;significantly&nbsp;different scale and better viewed independently).</span><br /><br /><span class="Apple-style-span" style="font-size: 13px;">library(ggplot2)</span><br /><span class="Apple-style-span" style="font-family: courier, monaco, monospace, sans-serif; font-size: 13px;">DF=melt(aDFmonth[-1], id.vars='Year')</span><br /><br /><div><span class="Apple-style-span" style="font-family: courier, monaco, monospace, sans-serif; font-size: 13px;">ggplot(DF, aes(x=Year,y=value, group=variable)) + geom_line() + facet_wrap(~ variable, ncol=1)</span></div><div class="separator" style="clear: both; text-align: center;"><a href="http://1.bp.blogspot.com/_FsLa1cMTCWU/TNsvbu2PGzI/AAAAAAAAAhc/hgsBQnNFeng/s1600/mortgage_payment_principal_interest.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"><img border="0" height="398" src="http://1.bp.blogspot.com/_FsLa1cMTCWU/TNsvbu2PGzI/AAAAAAAAAhc/hgsBQnNFeng/s400/mortgage_payment_principal_interest.png" width="400" /></a></div><div><span class="Apple-style-span" style="font-family: courier, monaco, monospace, sans-serif; font-size: 13px;"><span class="Apple-style-span" style="font-family: 'Times New Roman'; font-size: medium;">You can quickly manipulate the data frame to view amortization information instead. &nbsp;Use the exact same ggplot call (though the facet_wrap is removed below as unnecessary for a single variable) &nbsp;to create a chart scaled to fit the values relevant to the amortization.</span></span></div><div><span class="Apple-style-span" style="font-family: courier, monaco, monospace, sans-serif; font-size: 13px;"><br /></span></div><div><span class="Apple-style-span" style="font-family: courier, monaco, monospace, sans-serif; font-size: 13px;">DF=melt(aDFmonth[c(1,5)], id.vars='Year')</span></div><div><span class="Apple-style-span" style="font-family: courier, monaco, monospace, sans-serif; font-size: 13px;">ggplot(DF, aes(x=Year,y=value, group=variable))+ geom_line()&nbsp;</span></div><div><span class="Apple-style-span" style="font-family: courier, monaco, monospace, sans-serif; font-size: 13px;"><br /></span></div><div class="separator" style="clear: both; text-align: center;"><a href="http://1.bp.blogspot.com/_FsLa1cMTCWU/TNswYk5iz7I/AAAAAAAAAhg/rJEFKPoWUwk/s1600/mortgage_amortization.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"><img border="0" height="398" src="http://1.bp.blogspot.com/_FsLa1cMTCWU/TNswYk5iz7I/AAAAAAAAAhg/rJEFKPoWUwk/s400/mortgage_amortization.png" width="400" /></a></div><div><br /></div><div><span class="Apple-style-span" style="font-family: Times, 'Times New Roman', serif;">The limits of calculations and visualizations available in a web calculator or Excel are reached pretty quickly. &nbsp;R provides the means to create relatively full featured solutions in only a few lines of code.&nbsp;</span></div></span></content>
<link rel="replies" type="application/atom+xml" href="http://www.r-chart.com/feeds/1179246055042215866/comments/default" title="Post Comments"/>
<link rel="replies" type="text/html" href="http://www.r-chart.com/2010/11/mortgage-calculator-and-amortization.html#comment-form" title="0 Comments"/>
<link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/3867310391951630980/posts/default/1179246055042215866"/>
<link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/3867310391951630980/posts/default/1179246055042215866"/>
<link rel="alternate" type="text/html" href="http://www.r-chart.com/2010/11/mortgage-calculator-and-amortization.html" title="Mortgage Calculator (and Amortization Charts) with R"/>
<author>
<name>C</name>
<uri>http://www.blogger.com/profile/02893688387818336028</uri>
<email>[email protected]</email>
<gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif"/>
</author>
<media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://1.bp.blogspot.com/_FsLa1cMTCWU/TNs3fPWMZaI/AAAAAAAAAhk/OM5HulgRhZM/s72-c/mortgage_calculator.PNG" height="72" width="72"/>
<thr:total>0</thr:total>
</entry>
<entry>
<id>tag:blogger.com,1999:blog-3867310391951630980.post-8351171166433109175</id>
<published>2010-11-09T16:50:00.000-08:00</published>
<updated>2010-11-11T03:44:56.360-08:00</updated>
<category scheme="http://www.blogger.com/atom/ns#" term="ggplot2"/>
<category scheme="http://www.blogger.com/atom/ns#" term="Economics"/>
<title type="text">Don't be a Turkey</title>
<content type="html"><div class="separator" style="clear: both; text-align: center;"><a href="http://1.bp.blogspot.com/_FsLa1cMTCWU/TNnmxtYGe7I/AAAAAAAAAhQ/JRqsx6Sx0z4/s1600/turkey.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"><img border="0" height="298" src="http://1.bp.blogspot.com/_FsLa1cMTCWU/TNnmxtYGe7I/AAAAAAAAAhQ/JRqsx6Sx0z4/s400/turkey.png" width="400" /></a></div><div class="separator" style="clear: both; text-align: center;"><br /></div><div class="separator" style="clear: both; text-align: center;"></div><div class="separator" style="clear: both; margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px; text-align: center;"><span class="Apple-style-span" style="-webkit-border-horizontal-spacing: 2px; -webkit-border-vertical-spacing: 2px; font-family: Verdana, Arial, Helvetica, sans-serif; font-size: small;">'Indeed, I am moving on: my new project is about methods on how to domesticate the unknown, exploit randomness, figure out&nbsp;<i>how to live in a world we don't understand very well</i>. While most human thought (particularly since the enlightenment) has focused us on how to turn knowledge into decisions, my new mission is to build methods to turn lack of information, lack of understanding, and lack of "knowledge" into decisions—how, as we will see, not to be a "turkey".'</span></div><div class="separator" style="clear: both; margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px; text-align: center;"><span class="Apple-style-span" style="-webkit-border-horizontal-spacing: 2px; -webkit-border-vertical-spacing: 2px; font-family: Verdana, Arial, Helvetica, sans-serif; font-size: small;">-&nbsp;<a href="http://www.edge.org/3rd_culture/taleb08/taleb08_index.html">Nassim Nicholas Taleb</a></span></div><br /><div class="separator" style="clear: both; text-align: center;"><br /></div><div class="separator" style="clear: both; text-align: left;">With thanksgiving on the way, an economic lesson provided by a turkey's statistical department seems appropriate. &nbsp; &nbsp;Our turkey - let's call him auRthur - like most turkeys has a statistical department at his disposal. &nbsp;His&nbsp;department is in fact tracking an index - the Turkey Welfare Index which is a reflection of how much the human race cares about auRthur. &nbsp;Notice the relatively positive trend... until <whack> Thanksgiving Day...</whack></div><div class="separator" style="clear: both; text-align: center;"><br /></div><div class="separator" style="clear: both; text-align: center;"><a href="http://4.bp.blogspot.com/_FsLa1cMTCWU/TNnfcvvyMWI/AAAAAAAAAhM/1T860Uju4n8/s1600/TurkeyWelfareIndex.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"><img border="0" height="398" src="http://4.bp.blogspot.com/_FsLa1cMTCWU/TNnfcvvyMWI/AAAAAAAAAhM/1T860Uju4n8/s400/TurkeyWelfareIndex.png" width="400" /></a></div>Evidently, our auRthur's statistical department utilized a model that had some flaws - "past performance is not necessarily a predictor of future returns". &nbsp; This is because the harvesting of the turkey is a "rare event." &nbsp;Rare (unprecedented) events are difficult to predict. &nbsp;The story is not terribly amusing to turkeys to begin with - but becomes less amusing to humans when understood as a metaphor of the financial meltdown and statistical modeling in use by banking institutions. &nbsp;Essentially, banking institutions assumed a huge amount of risk because a catastrophic meltdown was simply outside the realm of consideration. &nbsp;It was not represented in most of the models in use.<br /><br />A great and vivid illustration. &nbsp;See <a href="http://www.edge.org/3rd_culture/taleb08/taleb08_index.html">Nassim Nicholas Taleb's essay</a> where this chart and illustration&nbsp;originally&nbsp;appeared at <a href="http://edge.org/">edge.org</a>. &nbsp;This article discusses the limits of statistical thinking and is a good springboard to other writings by Taleb - who was a&nbsp;practitioner&nbsp;of risk as he ran a hedge fund for a number of years and saw many of the practices in the financial industry up close and personal.<br /><br /><br />The chart above was created using R and ggplot2. &nbsp;The data frame named DF was populated with data related to the Turkey Welfare Index.<br /><br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: small;">&gt; DF</span></span><br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: small;">&nbsp;&nbsp; TWI Day color</span></span><br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: small;">1 &nbsp; 14 &nbsp; 1 black</span></span><br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: small;">2 &nbsp; 15 &nbsp; 2 black</span></span><br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: small;">3 &nbsp; 16 &nbsp; 3 black</span></span><br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: small;">4 &nbsp; 17 &nbsp; 4 black</span></span><br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: small;">5 &nbsp; 18 &nbsp; 5 black</span></span><br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: small;">6 &nbsp; 19 &nbsp; 6 black</span></span><br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: small;">7 &nbsp; 20 &nbsp; 7 black</span></span><br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: small;">8 -100 &nbsp; 8 &nbsp; red</span></span><br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: small;"><br /></span></span><br /><span class="Apple-style-span" style="font-family: inherit;">UPADTE: &nbsp;This can be entered in a few different ways. &nbsp;One is through a grid (which requires that you specify the Day as a factor).</span><br /><br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: small;">&nbsp;&nbsp;DF=edit(data.frame())</span></span><br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: small;">&nbsp;&nbsp;DF$Day=factor(DF$Day)</span></span><br /><span class="Apple-style-span" style="font-family: inherit;"><br /></span><br /><span class="Apple-style-span" style="font-family: inherit;">Plotted using ggplot2:</span><br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: small;"><br /></span></span><br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: small;">library(ggplot2)</span></span><br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: small;"><br /></span></span><br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: small;">ggplot(data=DF, aes(x=Day, y=TWI, fill=color)) +&nbsp;</span></span><br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: small;">&nbsp;&nbsp;geom_bar() +&nbsp;</span></span><br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: small;">&nbsp;&nbsp;scale_fill_manual(value= c("black", "red")) +&nbsp;</span></span><br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: small;">&nbsp;&nbsp;theme_bw() + scale_x_discrete(breaks = NA) +&nbsp;</span></span><br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: small;">&nbsp;&nbsp;opts(legend.position='none', axis.title.x=theme_blank(),&nbsp;</span></span><br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: small;">&nbsp;&nbsp; &nbsp; &nbsp; &nbsp;axis.title.y=theme_blank(),&nbsp;</span></span><br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: small;">&nbsp;&nbsp; &nbsp; &nbsp; &nbsp; title='Turkey Welfare Index')</span></span><br /><div><br /></div><div>This included a couple of somewhat unusual settings to shut off labels and axes that results in the simple "plain" appearance you see above.</div><div><br /></div><div>So - Happy Thanksgiving - understand statistics and don't be a turkey...</div></content>
<link rel="replies" type="application/atom+xml" href="http://www.r-chart.com/feeds/8351171166433109175/comments/default" title="Post Comments"/>
<link rel="replies" type="text/html" href="http://www.r-chart.com/2010/11/dont-be-turkey.html#comment-form" title="2 Comments"/>
<link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/3867310391951630980/posts/default/8351171166433109175"/>
<link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/3867310391951630980/posts/default/8351171166433109175"/>
<link rel="alternate" type="text/html" href="http://www.r-chart.com/2010/11/dont-be-turkey.html" title="Don't be a Turkey"/>
<author>
<name>C</name>
<uri>http://www.blogger.com/profile/02893688387818336028</uri>
<email>[email protected]</email>
<gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif"/>
</author>
<media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://1.bp.blogspot.com/_FsLa1cMTCWU/TNnmxtYGe7I/AAAAAAAAAhQ/JRqsx6Sx0z4/s72-c/turkey.png" height="72" width="72"/>
<thr:total>2</thr:total>
</entry>
<entry>
<id>tag:blogger.com,1999:blog-3867310391951630980.post-6556038199488581490</id>
<published>2010-11-03T17:16:00.000-07:00</published>
<updated>2010-11-03T17:16:48.628-07:00</updated>
<category scheme="http://www.blogger.com/atom/ns#" term="iPhone"/>
<title type="text">iPhone App Store Acceptance Time / Download Results</title>
<content type="html"><table><tbody><tr><td><br /><div class="separator" style="clear: both; text-align: center;"><a href="http://2.bp.blogspot.com/_FsLa1cMTCWU/TNH66yq20fI/AAAAAAAAAgw/6pfNg0qbn8c/s1600/r-iphone.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"><img border="0" height="200" src="http://2.bp.blogspot.com/_FsLa1cMTCWU/TNH66yq20fI/AAAAAAAAAgw/6pfNg0qbn8c/s200/r-iphone.png" width="146" /></a></div></td> <td>&nbsp;&nbsp;</td> <td><br />Complaints about the iPhone App Store are not uncommon among developers. &nbsp;The submission process is frustrating at best, you can expect arbitrary rejections, and Apple's policies have not always been particularly open or welcoming. &nbsp;If you make it through the process and get an app accepted, it can be essentially buried where it will remain unused unless you dedicate significant energy to marketing it. </td></tr></tbody></table><br />And so I figured I would do a small scale experiment to check App Store response in these areas (and App Store user behavior)....and post the results here in the hopes that developers planning to write iPhone apps would benefit from my experience.<br /><br /><b>Cheaper by the (Half) Dozen</b><br />Having a limited amount of time available, and appreciating the value of quick iterations in development or in any business process, I limited myself to submitting a few apps with similar functionality and different target audiences. &nbsp;What follows is my findings (with a bit of R code used for analysis). <br /><br />Six free iPhone apps were submitted for publication during a 21 day period between &nbsp;July 23,&nbsp;2010 and &nbsp;August 13, 2010. &nbsp;Four apps were accepted and two were rejected.<br /><br />The four apps that were accepted:<br /><br /><ul><li><a href="http://itunes.apple.com/us/app/r-chart/id384073723?mt=8"><span class="Apple-style-span" style="font-family: inherit;">R-Chart</span></a><span class="Apple-style-span" style="font-family: inherit;">&nbsp;(R Programming Community News) has been available 96 days and averaged 4.44 downloads per day. &nbsp;(<a href="http://itunes.apple.com/us/app/r-chart/id384073723?mt=8">Download it</a> if you want to keep up with this blog and/or R community news).</span></li><li><span class="Apple-style-span" style="font-family: inherit;"><a href="http://itunes.apple.com/us/app/frb/id386008821?mt=8"><span class="Apple-style-span" style="font-family: inherit;">FRB</span></a><span class="Apple-style-span" style="font-family: inherit;">&nbsp;(U.S. Federal Reserve Board News) has been available 82 days and averaged 4.15 downloads per day.</span></span></li><li><span class="Apple-style-span" style="font-family: inherit;"><span class="Apple-style-span" style="font-family: inherit;"><a href="http://itunes.apple.com/us/app/duq-news/id384506820?mt=8"><span class="Apple-style-span" style="font-family: inherit;">Duq News</span></a><span class="Apple-style-span" style="font-family: inherit;">&nbsp;(Duquesne University News) has been available 92 days and averaged 2.52 downloads per day.</span></span></span></li><li><span class="Apple-style-span" style="font-family: inherit;"><span class="Apple-style-span" style="font-family: inherit;"><span class="Apple-style-span" style="font-family: inherit;"><a href="http://itunes.apple.com/us/app/visit-the-lehigh-valley/id384370305?mt=8"><span class="Apple-style-span" style="font-family: inherit;">Visit the Lehigh Valley</span></a><span class="Apple-style-span" style="font-family: inherit;">&nbsp;(Lehigh Valley Tourism Info) has been available 93 &nbsp;days and averaged1.31 downloads per day.</span></span></span></span></li></ul><br />Two apps were rejected &nbsp; <br /><br /><ul><li>&nbsp;DeSales U (DeSales University News)</li><li>&nbsp;Blender Buzz (Blender Software Blog/News )</li></ul><br /><div><b>App Functionality and Subject Area</b></div><div>All apps contained essentially equivalent functionality, but differed by subject area and graphic and styling qualities. &nbsp;The "<a href="http://itunes.apple.com/us/app/r-chart/id384073723?mt=8">R-Chart</a>" and "Blender Buzz" apps reference resources of interest to software users and were intended to promote <a href="http://www.r-chart.com/">this blog</a> and the <a href="http://blenderbuzz.blogspot.com/">Blender Buzz blog</a>. &nbsp;They are topical and not limited by locale or institution. &nbsp;"<a href="http://itunes.apple.com/us/app/duq-news/id384506820?mt=8">Duq News</a>" and "DeSales U" provide news from <a href="http://www.desales.edu/">Duquesne</a> and <a href="http://www.desales.edu/">DeSales Universities</a>. &nbsp;"<a href="http://itunes.apple.com/us/app/visit-the-lehigh-valley/id384370305?mt=8">Visit the Lehigh Valley</a>" provides information about places and events for visitors to Eastern Pennsylvania. The "<a href="http://itunes.apple.com/us/app/frb/id386008821?mt=8">FRB</a>" app provides latest publicly available news from the U.S. Federal Reserve Board.</div><br /><b>Review Process and Time</b><br /><br /><div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;">It took between 7 and 9 days for the App Store to review an app and either accept or reject it. &nbsp;It does not appear that subject area of the app contributed to its acceptance. &nbsp;Both R-Chart and Blender Buzz were directed at programming communities - one was accepted and the other was rejected. &nbsp;Likewise, one of the University apps was accepted, the other was rejected. &nbsp;Each app was submitted once only. &nbsp;None were resubmitted after initial rejection.</div><div class="separator" style="clear: both; margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px; text-align: center;"><a href="http://3.bp.blogspot.com/_FsLa1cMTCWU/TNHtcGxrSgI/AAAAAAAAAgs/pW3a-E1g58g/s1600/processing_time.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"><img border="0" height="398" src="http://3.bp.blogspot.com/_FsLa1cMTCWU/TNHtcGxrSgI/AAAAAAAAAgs/pW3a-E1g58g/s400/processing_time.png" style="cursor: move;" width="400" /></a></div><div><br /></div><div><b>Download Counts</b></div><div>A total of 1120 have been downloaded. &nbsp;As noted above, the result is between 1 and 4 downloads per day.</div><br /><div class="separator" style="clear: both; text-align: center;"><a href="http://3.bp.blogspot.com/_FsLa1cMTCWU/TNHqECZOZmI/AAAAAAAAAgo/iBLdSIV-WGg/s1600/app_downloads.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"><img border="0" height="398" src="http://3.bp.blogspot.com/_FsLa1cMTCWU/TNHqECZOZmI/AAAAAAAAAgo/iBLdSIV-WGg/s400/app_downloads.png" width="400" /></a></div><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: small;"><b><u><br /></u></b></span></span><br /><div>I have not expended much effort in promotion. &nbsp;I tweeted about the R-Chart App and mentioned it here on this blog but never promoted any of the others. &nbsp;So the downloads were the result of folks searching for an app that was of interest to them.</div><br /><br /><b>Conclusion...</b><br />So getting an app accepted by the app store is not an insurmountable process - but does require time and planning. &nbsp;It is not an activity that will just take care of itself. &nbsp;And because of&nbsp;inconsistency in the process, you would do better to allot a bit of extra time for the app store process. &nbsp;And unless your app fills a rather unique niche, you will need to do marketing in the same way that you would for a web site or any other resource.<br /><br /><b>R Code used in the Analysis is Below</b><br /><br /><br /><div><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">&gt; # Read in the data</span></span></div><br /><br /><div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: small;">&gt; df = read.csv('app_stats.txt')</span></span></div><div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: small;">&gt; df</span></span></div><div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: small;">&nbsp;&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;App Downloads Submitted_Date Response_Date Response</span></span></div><div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: small;">1 &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; R-Chart &nbsp; &nbsp; &nbsp; 426 &nbsp; &nbsp; 2010-07-23 &nbsp; &nbsp;2010-07-30 Accepted</span></span></div><div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: small;">2 &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; FRB &nbsp; &nbsp; &nbsp; 340 &nbsp; &nbsp; 2010-08-04 &nbsp; &nbsp;2010-08-13 Accepted</span></span></div><div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: small;">3 &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;Duq News &nbsp; &nbsp; &nbsp; 232 &nbsp; &nbsp; 2010-07-26 &nbsp; &nbsp;2010-08-03 Accepted</span></span></div><div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: small;">4 Visit the Lehigh Valley &nbsp; &nbsp; &nbsp; 122 &nbsp; &nbsp; 2010-07-25 &nbsp; &nbsp;2010-08-02 Accepted</span></span></div><div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: small;">5 &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; DeSales U &nbsp; &nbsp; &nbsp; &nbsp; 0 &nbsp; &nbsp; 2010-08-02 &nbsp; &nbsp;2010-08-09 Rejected</span></span></div><div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: small;">6 &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;Blender Buzz &nbsp; &nbsp; &nbsp; &nbsp; 0 &nbsp; &nbsp; 2010-07-26 &nbsp; &nbsp;2010-08-03 Rejected</span></span></div><br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: small;"><br /></span></span><br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: small;">&gt; # Get the total downloads</span></span><br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: small;">&gt; sum(df$Downloads)</span></span><br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: small;">&gt;&nbsp;</span></span><br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace; font-size: small;">&gt; Do a plot of downloads</span><br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Times New Roman'; font-size: medium;"></span></span></span><br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"></span><br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"></span><br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><div><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace; font-size: small;">&gt; ggplot(data=df, aes(x=App, y=Downloads, fill=Response))&nbsp;</span></div></span><br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: small;">&nbsp;&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;+ geom_bar() + coord_flip() + theme_bw()</span></span><br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: small;">&gt; ggsave('app_downloads.png')</span></span><br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: small;">&gt;</span></span><br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: small;">&gt; ggplot(data=df, aes(x=App, y=as.numeric(Processing_Time), fill=Response)) + geom_bar() + coord_flip() + theme_bw()&nbsp;</span></span><br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: small;">&gt; ggsave('processing_time.png')</span></span><br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: small;">&gt;</span></span><br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Times New Roman'; font-size: medium;"></span></span></span><br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"></span><br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"></span><br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: small;">&gt; # Cast the date columns as such</span></span></div><div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Times New Roman'; font-size: medium;"></span></span></span></div><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: small;">&gt; df$Submitted_Date=as.Date(df$Submitted_Date)</span></span></div><div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: small;">&gt; df$Response_Date=as.Date(df$Response_Date)</span></span></div><div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: small;">&gt;</span></span></div><div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: small;">&gt; # Find out total dates each application was on the market</span></span></div><div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"></div><div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: small;">&gt; as.Date('2010-11-03') - min(df$Submitted_Date)</span></span></div><div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: small;">&gt;</span></span></div><div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: small;">&gt; # Determine total number of days apps were being submitted for review</span></span></div><div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace; font-size: small;">&gt; max(df$Response_Date) - min(df$Submitted_Date)</span></div><div><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace; font-size: small;">&gt;</span></div><div><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace; font-size: small;">&gt; # Processing time at the app store&nbsp;</span></div></span><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"></span></span><br /><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">&gt; df$Processing_Time = df$Response_Date - df$Submitted_Date</span></span><br /><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">&gt;</span></span><br /><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">&gt; # Time each app has been available for download</span></span><br /><br /><div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">&gt; df$days_available = as.Date('2010-11-03') - df$Response_Date</span></span></div><div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">&gt;&nbsp;</span></span></div><div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">&gt; # Downloads per day</span></span></div><div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: small;">&gt; df$downloads_per_day = df$Downloads/as.numeric(df$days_available)</span></span></div><div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: small;">&gt;</span></span></div><div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: small;">&gt; # Limit view to selected columns</span></span></div><div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: small;">&gt;&nbsp;df[c(1,7,8)]</span></span></div><br /><br /><div></div><div><br /></div></content>
<link rel="replies" type="application/atom+xml" href="http://www.r-chart.com/feeds/6556038199488581490/comments/default" title="Post Comments"/>
<link rel="replies" type="text/html" href="http://www.r-chart.com/2010/11/iphone-app-store-acceptance-time.html#comment-form" title="0 Comments"/>
<link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/3867310391951630980/posts/default/6556038199488581490"/>
<link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/3867310391951630980/posts/default/6556038199488581490"/>
<link rel="alternate" type="text/html" href="http://www.r-chart.com/2010/11/iphone-app-store-acceptance-time.html" title="iPhone App Store Acceptance Time / Download Results"/>
<author>
<name>C</name>
<uri>http://www.blogger.com/profile/02893688387818336028</uri>
<email>[email protected]</email>
<gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif"/>
</author>
<media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://2.bp.blogspot.com/_FsLa1cMTCWU/TNH66yq20fI/AAAAAAAAAgw/6pfNg0qbn8c/s72-c/r-iphone.png" height="72" width="72"/>
<thr:total>0</thr:total>
</entry>
<entry>
<id>tag:blogger.com,1999:blog-3867310391951630980.post-4939968277579407625</id>
<published>2010-10-18T19:20:00.000-07:00</published>
<updated>2010-10-18T19:20:16.398-07:00</updated>
<category scheme="http://www.blogger.com/atom/ns#" term="ggplot2"/>
<title type="text">Hadley on a Postage Stamp?</title>
<content type="html"><img src="http://imgur.com/DrIlR.png" /><br /><br />Yet another reason to check out the <a href="http://github.com/hadley/ggplot2/wiki/Crime-in-Downtown-Houston,-Texas-:-Combining-ggplot2-and-Google-Maps">ggplot2 wiki!</a></content>
<link rel="replies" type="application/atom+xml" href="http://www.r-chart.com/feeds/4939968277579407625/comments/default" title="Post Comments"/>
<link rel="replies" type="text/html" href="http://www.r-chart.com/2010/10/hadley-on-postage-stamp.html#comment-form" title="0 Comments"/>
<link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/3867310391951630980/posts/default/4939968277579407625"/>
<link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/3867310391951630980/posts/default/4939968277579407625"/>
<link rel="alternate" type="text/html" href="http://www.r-chart.com/2010/10/hadley-on-postage-stamp.html" title="Hadley on a Postage Stamp?"/>
<author>
<name>C</name>
<uri>http://www.blogger.com/profile/02893688387818336028</uri>
<email>[email protected]</email>
<gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif"/>
</author>
<thr:total>0</thr:total>
</entry>
<entry>
<id>tag:blogger.com,1999:blog-3867310391951630980.post-7793013875051299367</id>
<published>2010-10-18T15:12:00.000-07:00</published>
<updated>2010-10-18T15:14:42.968-07:00</updated>
<category scheme="http://www.blogger.com/atom/ns#" term="ggplot2"/>
<category scheme="http://www.blogger.com/atom/ns#" term="quantmod"/>
<category scheme="http://www.blogger.com/atom/ns#" term="Cramer"/>
<title type="text">Cramer's Stock Pick Recommendations Analyzed (Part II)</title>
<content type="html"><table><tbody><tr><td><a href="http://4.bp.blogspot.com/_FsLa1cMTCWU/TLzFo4s-JuI/AAAAAAAAAeg/4y2xqNdiH1c/s1600/R_With_Jim_Cramer2.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"><br /><img border="0" src="http://4.bp.blogspot.com/_FsLa1cMTCWU/TLzFo4s-JuI/AAAAAAAAAeg/4y2xqNdiH1c/s1600/R_With_Jim_Cramer2.png" /></a></td><td>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</td><td>This is the second post (previous one<a href="http://www.r-chart.com/2010/10/cramers-stock-pick-recommendations.html"> here</a>) that provides an analysis of Cramer's stock recommendations based upon the <a href="http://www.thestreet.com/mad-money/index.html">Mad Money Stock Screener</a> as of 10/15/2010.</td></tr></tbody></table><br /><b>Recommendations by Segment</b><br />As mentioned in the <a href="http://www.r-chart.com/2010/10/cramers-stock-pick-recommendations.html">previous post</a>, recommendations are referenced either by a number below or by name.<br /><b><span class="Apple-style-span" style="font-weight: normal;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"># &nbsp; Description</span></span></b><br /><b> <span class="Apple-style-span" style="font-weight: normal;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">5 &nbsp; Buy</span></span><br /><span class="Apple-style-span" style="font-weight: normal;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">4 &nbsp; Positive</span></span><br /><span class="Apple-style-span" style="font-weight: normal;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">3 &nbsp; Hold</span></span><br /><span class="Apple-style-span" style="font-weight: normal;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">2 &nbsp; Negative</span></span><br /><span class="Apple-style-span" style="font-weight: normal;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">1 &nbsp; Sell</span></span></b><br /><b><br /></b><br />Referencing calls by number is used to provide average scores and to plot results. &nbsp;Also as noted previously, not all segments of the show are available through the Stock Screener.<br /><br />Calls made during the Lightning Round and Mail Bag have an average of less than 4 (positive). &nbsp;Calls driven by questions by the audience have a lower average, while those in interviews are higher.<br /><br /><br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><b>Segment &nbsp; &nbsp; &nbsp; &nbsp; Average Call</b></span><br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">Discussed &nbsp; &nbsp; &nbsp; 4.387156</span><br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">Featured &nbsp; &nbsp; &nbsp; &nbsp;4.351351</span><br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">Interview &nbsp; &nbsp; &nbsp; 4.895954</span><br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">Lighting Round &nbsp;3.764465</span><br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">Mail Bag &nbsp; &nbsp; &nbsp; &nbsp;3.605691</span><br /><div><br /></div><br /><br /><div class="separator" style="clear: both; text-align: center;"><a href="http://1.bp.blogspot.com/_FsLa1cMTCWU/TLy75miX3cI/AAAAAAAAAd8/1XKHJUZ-uqM/s1600/cramer_average_call_by_segment.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"><img border="0" height="398" src="http://1.bp.blogspot.com/_FsLa1cMTCWU/TLy75miX3cI/AAAAAAAAAd8/1XKHJUZ-uqM/s400/cramer_average_call_by_segment.png" width="400" /></a></div><br /><b>Calls Trail the Market</b><br /><br />This is a bit more speculative a representation. &nbsp;The S &amp; P and Down Jones Industrial Average are used in this comparison. &nbsp;The call (on a scale of 1 to 5) is multiplied by a factor so that a smoothed condition mean line is generated on the chart. &nbsp;The factor is arbitrary, it just makes the line fit on the chart in a reasonable location, so the directionality of the call line is relevant - not the degree. <br /><br />The S &amp; P appears as the red line and the average of the calls for each day (times a factor of 3000) appears in blue.<br /><br /><br /><div class="separator" style="clear: both; text-align: center;"><a href="http://1.bp.blogspot.com/_FsLa1cMTCWU/TLy-6-SnKLI/AAAAAAAAAeE/Z9aFUrdv1xM/s1600/s_and_p_calls.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"><img border="0" height="398" src="http://1.bp.blogspot.com/_FsLa1cMTCWU/TLy-6-SnKLI/AAAAAAAAAeE/Z9aFUrdv1xM/s400/s_and_p_calls.png" width="400" /></a></div>The DJI is multiplied by a factor of 3000 in the chart below.<br /><div class="separator" style="clear: both; text-align: center;"><a href="http://1.bp.blogspot.com/_FsLa1cMTCWU/TLy_sBcpCoI/AAAAAAAAAeI/BO62OLv610g/s1600/dji_calls.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"><img border="0" height="398" src="http://1.bp.blogspot.com/_FsLa1cMTCWU/TLy_sBcpCoI/AAAAAAAAAeI/BO62OLv610g/s400/dji_calls.png" width="400" /></a></div><br />Although it is not completely clear, it appears that stock picks tend to trail the market movement. &nbsp;For this reason they generally sound plausible. &nbsp;Calls tend to be more pessimistic at the time the market has been moving down.<br /><br /><b>5 of Cramer's Favorites</b><br />A few of the stocks that Cramer has recommended in the last year qualify in a special way as "favorites." &nbsp;They are the stocks that appear the most times in the data with only a buy recommendation.<br /><b><b></b></b><br /><b><b><div style="display: inline !important; margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"><div style="display: inline !important;"><span class="Apple-style-span" style="font-weight: normal;"><br /></span></div></div></b></b><br /><b><b></b></b><br /><b><b><div style="display: inline !important; margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"><div style="display: inline !important;"><span class="Apple-style-span" style="font-weight: normal;">Wynn Resorts (WYNN) had a range of $27.00 between the lowest and highest 19 buy recommendations.</span></div></div></b></b><br /><b><b><div class="separator" style="clear: both; text-align: center;"><a href="http://1.bp.blogspot.com/_FsLa1cMTCWU/TLzCmj_6l0I/AAAAAAAAAeM/c21g-67sr8U/s1600/jim_cramer_WYNN.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"><img border="0" height="398" src="http://1.bp.blogspot.com/_FsLa1cMTCWU/TLzCmj_6l0I/AAAAAAAAAeM/c21g-67sr8U/s400/jim_cramer_WYNN.png" width="400" /></a></div><div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"><span class="Apple-style-span" style="font-weight: normal;">&nbsp;&nbsp; &nbsp; &nbsp; &nbsp; &nbsp;&nbsp;</span></div></b><br /><span class="Apple-style-span" style="font-weight: normal;"><b><div style="display: inline !important; margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"><div style="display: inline !important;"><span class="Apple-style-span" style="font-weight: normal;">Weatherford Int'l (WFT)&nbsp;<b><b><b><b></b></b></b></b></span></div><span class="Apple-style-span" style="font-weight: normal;"><b><b><b><b></b></b></b></b></span><br /><span class="Apple-style-span" style="font-weight: normal;"><b><b><b><b><div style="display: inline !important; margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"><div style="display: inline !important;"><span class="Apple-style-span" style="font-weight: normal;">had a range of&nbsp;<b><b><b><b><b></b></b></b></b></b></span></div><span class="Apple-style-span" style="font-weight: normal;"><b><b><b><b><b></b></b></b></b></b></span><br /><span class="Apple-style-span" style="font-weight: normal;"><b><b><b><b><b><div style="display: inline !important; margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"><div style="display: inline !important;"><span class="Apple-style-span" style="font-weight: normal;">$6.18 between its 16 buy recommendations.</span></div></div></b></b></b></b></b></span></div></b></b></b></b></span></div></b></span><br /><div class="separator" style="clear: both; text-align: center;"><a href="http://4.bp.blogspot.com/_FsLa1cMTCWU/TLzCs-f11DI/AAAAAAAAAeQ/Nb6UZLS9RQE/s1600/jim_cramer_WFT.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"><img border="0" height="398" src="http://4.bp.blogspot.com/_FsLa1cMTCWU/TLzCs-f11DI/AAAAAAAAAeQ/Nb6UZLS9RQE/s400/jim_cramer_WFT.png" width="400" /></a></div><div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"><span class="Apple-style-span" style="font-weight: normal;"><b><b><b></b></b></b></span></div><b><b><b><div style="display: inline !important; margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"><span class="Apple-style-span" style="font-weight: normal;"><b></b></span></div><b><div style="display: inline !important; margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"><span class="Apple-style-span" style="font-weight: normal;"><br /></span></div></b></b></b></b><br /><span class="Apple-style-span" style="font-weight: normal;"><b><div style="display: inline !important; margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"><div style="display: inline !important;"><span class="Apple-style-span" style="font-weight: normal;">NVIDIA (NVDA) &nbsp;had range of 9.05 between its 15 buy recommendations</span></div></div></b></span><br /><div class="separator" style="clear: both; text-align: center;"><a href="http://1.bp.blogspot.com/_FsLa1cMTCWU/TLzCzuy17OI/AAAAAAAAAeU/kiOW-pLhocQ/s1600/jim_cramer_NVDA.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"><img border="0" height="398" src="http://1.bp.blogspot.com/_FsLa1cMTCWU/TLzCzuy17OI/AAAAAAAAAeU/kiOW-pLhocQ/s400/jim_cramer_NVDA.png" width="400" /></a></div><div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"><span class="Apple-style-span" style="font-weight: normal;"><b></b></span><br /><b></b><br /><b><div style="display: inline !important; margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"><div style="display: inline !important;"><span class="Apple-style-span" style="font-weight: normal;">Cypress Semiconductor (CY) had a range of 4.58 between its 13 buy recommendations.</span></div></div></b></div><div class="separator" style="clear: both; text-align: center;"><a href="http://1.bp.blogspot.com/_FsLa1cMTCWU/TLzC3adPLPI/AAAAAAAAAeY/-ZWlQWdDFbs/s1600/jim_cramer_CY.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"><img border="0" height="398" src="http://1.bp.blogspot.com/_FsLa1cMTCWU/TLzC3adPLPI/AAAAAAAAAeY/-ZWlQWdDFbs/s400/jim_cramer_CY.png" width="400" /></a></div><div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"><span class="Apple-style-span" style="font-weight: normal;"><b></b></span><br /><b></b><br /><b><div style="display: inline !important; margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"><div style="display: inline !important;"><span class="Apple-style-span" style="font-weight: normal;">Teva Pharmaceutical (TEVA) had a 17.03range in its 12 buy recommendations.</span></div></div></b></div><br /><div class="separator" style="clear: both; text-align: center;"><a href="http://3.bp.blogspot.com/_FsLa1cMTCWU/TLzC9MColUI/AAAAAAAAAec/rirZeq5wsYk/s1600/jim_cramer_TEVA.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"><img border="0" height="398" src="http://3.bp.blogspot.com/_FsLa1cMTCWU/TLzC9MColUI/AAAAAAAAAec/rirZeq5wsYk/s400/jim_cramer_TEVA.png" width="400" /></a></div><br />Conclusion<br /><span class="Apple-style-span" style="font-weight: normal;">We are bombarded with predictions and promises regularly in the news and media. Over time, I have grown more and more suspicious of the ability of individuals to consistently predict stock prices by simply having a superficial knowledge of current market motions and a general awareness of current financial news. &nbsp;</span><br /><span class="Apple-style-span" style="font-weight: normal;"> </span> <span class="Apple-style-span" style="font-weight: normal;"><b></b></span><br /><b><div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"><span class="Apple-style-span" style="font-weight: normal;">&nbsp;I often think about verifying the claims, but often don't have the time... figured I would at least take a cursory look. &nbsp;I hope this sort of thing becomes more common to keep the media honest. &nbsp;<b></b></span></div><b><div style="display: inline !important;"><span class="Apple-style-span" style="font-weight: normal;">At least Jim Cramer's show is cast somewhat in the realm of entertainment. &nbsp;Again, I'll refer you to <a href="http://online.barrons.com/article/SB118681265755995100.html">Bill Alpert of Barrons</a> who has done more extensive analysis and reporting on Cramer's recommendations. &nbsp;</span></div></b></b></b></content>
<link rel="replies" type="application/atom+xml" href="http://www.r-chart.com/feeds/7793013875051299367/comments/default" title="Post Comments"/>
<link rel="replies" type="text/html" href="http://www.r-chart.com/2010/10/cramers-stock-pick-recommendations_18.html#comment-form" title="0 Comments"/>
<link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/3867310391951630980/posts/default/7793013875051299367"/>
<link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/3867310391951630980/posts/default/7793013875051299367"/>
<link rel="alternate" type="text/html" href="http://www.r-chart.com/2010/10/cramers-stock-pick-recommendations_18.html" title="Cramer's Stock Pick Recommendations Analyzed (Part II)"/>
<author>
<name>C</name>
<uri>http://www.blogger.com/profile/02893688387818336028</uri>
<email>[email protected]</email>
<gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif"/>
</author>
<media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://4.bp.blogspot.com/_FsLa1cMTCWU/TLzFo4s-JuI/AAAAAAAAAeg/4y2xqNdiH1c/s72-c/R_With_Jim_Cramer2.png" height="72" width="72"/>
<thr:total>0</thr:total>
</entry>
<entry>
<id>tag:blogger.com,1999:blog-3867310391951630980.post-2241698458412841038</id>
<published>2010-10-16T15:56:00.000-07:00</published>
<updated>2010-10-16T15:56:30.551-07:00</updated>
<category scheme="http://www.blogger.com/atom/ns#" term="ggplot2"/>
<title type="text">2010 ggplot2 Case Study Competition Winners</title>
<content type="html"><table><tbody><tr><td><br /><iframe frameborder="0" marginheight="0" marginwidth="0" scrolling="no" src="http://rcm.amazon.com/e/cm?lt1=_blank&amp;bc1=000000&amp;IS2=1&amp;bg1=FFFFFF&amp;fc1=000000&amp;lc1=0000FF&amp;t=rcha-20&amp;o=1&amp;p=8&amp;l=as1&amp;m=amazon&amp;f=ifr&amp;md=10FE9736YVPPT7A0FBG2&amp;asins=0387981403" style="height: 240px; width: 120px;"></iframe></td> <td><br />The 2010 ggplot2 Case Study Competition Winners <a href="http://github.com/hadley/ggplot2/wiki">have been announced</a>! &nbsp;Congratulations to the winners!<br /><br /><br /><ul><li><a href="http://github.com/hadley/ggplot2/wiki/Crime-in-Downtown-Houston,-Texas-:-Combining-ggplot2-and-Google-Maps">Grand Prize</a>:&nbsp;David Kahle, Rice University</li><li><a href="http://github.com/hadley/ggplot2/wiki/Mapping-electrical-activity-in-the-human-neocortex">Finalist</a>:&nbsp;Michael Lavine, UMass Amherst&nbsp;</li><li><a href="http://github.com/hadley/ggplot2/wiki/Case-Study:-Raman-Spectroscopic-Grading-of-Gliomas">Finalist</a>: Claudia Beleites, TU Dresden &amp; Uni. Trieste</li></ul><br />Check out their entries to get a glimpse of what is possible with R and ggplot2.</td></tr></tbody></table></content>
<link rel="replies" type="application/atom+xml" href="http://www.r-chart.com/feeds/2241698458412841038/comments/default" title="Post Comments"/>
<link rel="replies" type="text/html" href="http://www.r-chart.com/2010/10/2010-ggplot2-case-study-competition.html#comment-form" title="2 Comments"/>
<link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/3867310391951630980/posts/default/2241698458412841038"/>
<link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/3867310391951630980/posts/default/2241698458412841038"/>
<link rel="alternate" type="text/html" href="http://www.r-chart.com/2010/10/2010-ggplot2-case-study-competition.html" title="2010 ggplot2 Case Study Competition Winners"/>
<author>
<name>C</name>
<uri>http://www.blogger.com/profile/02893688387818336028</uri>
<email>[email protected]</email>
<gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif"/>
</author>
<thr:total>2</thr:total>
</entry>
<entry>
<id>tag:blogger.com,1999:blog-3867310391951630980.post-1700837471508690785</id>
<published>2010-10-16T13:07:00.000-07:00</published>
<updated>2010-10-18T09:47:18.782-07:00</updated>
<category scheme="http://www.blogger.com/atom/ns#" term="ggplot2"/>
<category scheme="http://www.blogger.com/atom/ns#" term="quantmod"/>
<category scheme="http://www.blogger.com/atom/ns#" term="Cramer"/>
<title type="text">Cramer's Stock Pick Recommendations Analyzed</title>
<content type="html"><div class="MsoNormal"><div class="separator" style="clear: both; text-align: center;"></div><div class="separator" style="clear: both; text-align: center;"><a href="http://2.bp.blogspot.com/_FsLa1cMTCWU/TLx6DP7sw3I/AAAAAAAAAd4/rL18W-q7TTE/s1600/R_With_Jim_Cramer2.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"><img border="0" src="http://2.bp.blogspot.com/_FsLa1cMTCWU/TLx6DP7sw3I/AAAAAAAAAd4/rL18W-q7TTE/s1600/R_With_Jim_Cramer2.png" /></a></div><br />Bill Alpert of Barron’s has demonstrated the use of R in financial journalism as he <a href="http://online.barrons.com/article/SB118681265755995100.html">criticized</a> the performance of <a href="http://www.cnbc.com/id/15838187/">Jim Cramer’s</a> stock picks.&nbsp;&nbsp;Patrick Burns was an <a href="http://burns-stat.com/pages/Working/cramer_vs_pseudocramer.pdf">advisor for the analysis</a> done in the article. &nbsp;R was an important tool that allowed them to do their research as indicated by Burns in <a href="http://burns-stat.com/pages/Working/cramer_vs_pseudocramer.pdf">his article</a> and by Alpert in <a href="http://www.flickr.com/photos/palewire/4427731028/">his presentation</a>&nbsp;and in an <a href="http://cran.r-project.org/doc/Rnews/Rnews_2007-3.pdf">article</a> published in R News. &nbsp;Their specific critique were centered around obtaining objective verification of claims by CNBC that by following Cramer's advice, one could beat the S &amp; P 500 index. &nbsp; I recommend you to these sources if you are interested in a more comprehensive analysis.</div><div class="MsoNormal"><br /></div><div class="MsoNormal">The following is is based upon data available on the Mad Money Stock Screener as of&nbsp;10/15/2010.</div><div class="MsoNormal"><br /></div><div class="MsoNormal"><b>Mad Money Stock Screener</b></div><div class="MsoNormal">Since Cramer's <a href="http://www.thestreet.com/mad-money/index.html">Mad Money Stock Screener</a> is available on line there is at least an "unofficial" group of Cramer's recommendations available to analyze. &nbsp;It is apparent that the data is not complete, as several program segments in the drop down are not represented. &nbsp;Selecting any of the following segments results in no records returned:</div><div class="MsoNormal"></div><div class="MsoNormal"></div><ul><li>Caller's Stock</li><li>Game Plan</li><li>Sudden Death</li></ul><br /><br /><div class="MsoNormal">In addition, the date provided includes month and day (but not year information). &nbsp;It appears that data from one calendar year is available through the web site. &nbsp;This was born out by plotting the price at the time of the recommendation on charts for individual equities.</div><div class="MsoNormal"><br /></div><div class="MsoNormal"><b>Always A Bull Market Somewhere</b></div><div class="MsoNormal">According to his profile, "Jim Cramer believes that there is always a bull market somewhere, and he wants to help you find it". &nbsp;His optimistic, entertaining and confident approach that he exudes on screen is reflected in his history of stock picks. &nbsp;Recommendations are indicated either by a number below or by name:</div><div class="MsoNormal"><br /></div><div class="MsoNormal"><b><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"># &nbsp; Description</span></b></div><div class="MsoNormal"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">5 &nbsp; Buy</span></div><div class="MsoNormal"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">4 &nbsp; Positive</span></div><div class="MsoNormal"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">3 &nbsp; Hold</span></div><div class="MsoNormal"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">2 &nbsp; Negative</span></div><div class="MsoNormal"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">1 &nbsp; Sell</span></div><div class="MsoNormal"><br /></div><div class="MsoNormal">Cramer is functioning as an entertainer with financial knowledge. &nbsp;The show is geared towards providing action oriented advice (buy/sell) and tends to be skewed towards positive actions. &nbsp;This fits with his profile description - if there is always a bull market, there is always something to buy. &nbsp;</div><div class="MsoNormal"><br /></div><div class="MsoNormal"><b>Analysis of All Recommendations</b></div><div class="MsoNormal">The following jitter demonstrates that a 5 (Buy Recommendation) is most often given, and the smoothing indicates that Cramer is generally positive in his ratings.</div><div class="MsoNormal"><br /></div><div class="separator" style="clear: both; text-align: center;"><a href="http://2.bp.blogspot.com/_FsLa1cMTCWU/TLnyy8EeU6I/AAAAAAAAAdY/uJMGEQY8qzU/s1600/jim_cramer_recommendations_by_segment_jitter.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"><img border="0" height="398" src="http://2.bp.blogspot.com/_FsLa1cMTCWU/TLnyy8EeU6I/AAAAAAAAAdY/uJMGEQY8qzU/s400/jim_cramer_recommendations_by_segment_jitter.png" width="400" /></a></div><div class="MsoNormal">The program segment in use might also shed some light on the recommendations given. &nbsp;As mentioned earlier, not every program segment is represented in the data.</div><div class="MsoNormal"><br /></div><div class="separator" style="clear: both; text-align: center;"><a href="http://2.bp.blogspot.com/_FsLa1cMTCWU/TLnyQ4XpPTI/AAAAAAAAAdU/l7LQGWgllac/s1600/jim_cramer_recommendations_by_segment.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"><img border="0" height="398" src="http://2.bp.blogspot.com/_FsLa1cMTCWU/TLnyQ4XpPTI/AAAAAAAAAdU/l7LQGWgllac/s400/jim_cramer_recommendations_by_segment.png" width="400" /></a></div><div class="MsoNormal">The specific totals represented above:</div><div class="MsoNormal"><br /></div><div class="MsoNormal"></div><div class="MsoNormal"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">&nbsp;&nbsp; &nbsp; <b>Buy &nbsp; &nbsp; Hold Negative Positive &nbsp; &nbsp; Sell&nbsp;</b></span></div><div class="MsoNormal"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">&nbsp;&nbsp; &nbsp;2336 &nbsp; &nbsp; &nbsp; &nbsp;7 &nbsp; &nbsp; &nbsp;229 &nbsp; &nbsp; &nbsp;422 &nbsp; &nbsp; &nbsp;559&nbsp;</span></div><div><br /></div><br /><div class="MsoNormal">The vast majority of the time, Cramer gives a buy recommendation. &nbsp;The second most often provided recommendation is to sell. &nbsp;A clear "Buy" or "Sell" is certainly more entertaining to hear than a "Meh... &nbsp;hold." &nbsp;None of this in and of itself means that Cramer's ratings are bad or inferior to other sources. &nbsp;It simply serves to illustrate that the program is geared towards entertainment. &nbsp;At best, one might hope that only the clear winners are topics of conversation on the program. &nbsp;However, further analysis at least calls this into question.</div><div class="MsoNormal"><br /></div><div class="MsoNormal"><b>Individual Stock Recommendations</b></div><div class="MsoNormal">Not every company has an equal number of recommendations. &nbsp;The top 5 (in terms of total recommendations made) are Apple, Citigroup, Intel, Bank of America and Ford Motor Company. &nbsp;In order to get a sense of the a given&nbsp;recommendation&nbsp;in the scope of wider market history, the data from the stock screener can be superimposed on a stock chart.</div><div class="MsoNormal"><br /></div><div class="MsoNormal"><b>Apple</b></div><div class="separator" style="clear: both; text-align: center;"><a href="http://2.bp.blogspot.com/_FsLa1cMTCWU/TLn5bzqcKKI/AAAAAAAAAdc/7Nuoa1YyX9c/s1600/jim_cramer_AAPL.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"><img border="0" height="398" src="http://2.bp.blogspot.com/_FsLa1cMTCWU/TLn5bzqcKKI/AAAAAAAAAdc/7Nuoa1YyX9c/s400/jim_cramer_AAPL.png" width="400" /></a></div><div class="MsoNormal"><br /></div><div class="MsoNormal">Apple was given a&nbsp;recommendation&nbsp;on 90&nbsp;occasions (more than twice times as many than the next most popular companies recommended). The&nbsp;average recommendation was 4.933333 and Cramer recommended Buy 84 times and Positive 6 times. &nbsp;The clear upward trend in Apple's price justifies an optimistic view in recent history. &nbsp;None of the remaining stocks in the top 5 recommended fits this pattern though.</div><div class="MsoNormal"><br /></div><div class="MsoNormal"><b>Citigroup</b></div><div class="separator" style="clear: both; text-align: center;"><a href="http://4.bp.blogspot.com/_FsLa1cMTCWU/TLn61ZxthTI/AAAAAAAAAdg/YaglqDPl2BI/s1600/jim_cramer_C.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"><img border="0" height="398" src="http://4.bp.blogspot.com/_FsLa1cMTCWU/TLn61ZxthTI/AAAAAAAAAdg/YaglqDPl2BI/s400/jim_cramer_C.png" width="400" /></a></div><div class="MsoNormal"><br /></div><div class="MsoNormal"><b><span class="Apple-style-span" style="font-weight: normal;">Citigroup&nbsp;was given a&nbsp;recommendation&nbsp;on 40&nbsp;occasions and had&nbsp;an average recommendation of&nbsp;4.850000, Cramer recommended Buy 36 times and Positive 3 times and Negative 1 time. &nbsp;The negative rating was on the 12/08/2009 Lighting Round. &nbsp;</span></b></div><div class="MsoNormal"><b><span class="Apple-style-span" style="font-weight: normal;"><br /></span></b></div><div class="MsoNormal"><b>Intel</b></div><div class="separator" style="clear: both; text-align: center;"><a href="http://4.bp.blogspot.com/_FsLa1cMTCWU/TLn-xQJ39lI/AAAAAAAAAdo/EDA97hkhfjo/s1600/jim_cramer__INTC.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"><img border="0" height="398" src="http://4.bp.blogspot.com/_FsLa1cMTCWU/TLn-xQJ39lI/AAAAAAAAAdo/EDA97hkhfjo/s400/jim_cramer__INTC.png" width="400" /></a></div><div class="MsoNormal"><b><br /></b></div><div class="MsoNormal"><b><br /></b></div><div class="MsoNormal">Intel was tied for second place with Citigroup with 40 recommendations. &nbsp;It had an average recommendation of 4.900000. &nbsp;He recommended&nbsp;Buy on 36 occasions and&nbsp;Positive on 4 occasions.</div><div class="MsoNormal"></div><div class="MsoNormal"><br /></div><br /><div class="MsoNormal"><b>Bank of America</b>&nbsp;</div><div class="separator" style="clear: both; text-align: center;"><a href="http://2.bp.blogspot.com/_FsLa1cMTCWU/TLoAI0ZrHHI/AAAAAAAAAds/QWPm5G8iBrw/s1600/jim_cramer_BAC.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"><img border="0" height="398" src="http://2.bp.blogspot.com/_FsLa1cMTCWU/TLoAI0ZrHHI/AAAAAAAAAds/QWPm5G8iBrw/s400/jim_cramer_BAC.png" width="400" /></a></div><div class="MsoNormal"><br /></div><div class="MsoNormal">Bank of America had an average of 4.820513 in the 39 times it was recommended. &nbsp;It was given a Buy recommendation 34 times, a Positive 4 times and a negative once (During the 11/24/2009&nbsp;Lighting Round).</div><div class="MsoNormal"><br /></div><div class="MsoNormal"><b>Ford Motor&nbsp;</b></div><div class="MsoNormal">Ford Motor Company was given an average of 4.777778 in the 36 times it was recommended (a Buy 28 times and a Positive 6 times).</div><div class="MsoNormal"><br /></div><div class="MsoNormal"><b>A Negative Example</b></div><div class="MsoNormal">One additional stock that is of interest is British&nbsp;Petroleum&nbsp;(BP), which had an rough ride this year due to the <a href="http://www.r-chart.com/2010/06/plotting-bp-oil-spill-testing-data.html">Deepwater Horizon Oil Spill</a>. &nbsp;</div><div class="MsoNormal"><br /></div><div class="separator" style="clear: both; text-align: center;"><a href="http://1.bp.blogspot.com/_FsLa1cMTCWU/TLoCljdYhcI/AAAAAAAAAdw/FXa-htDiB5E/s1600/jim_cramer_BP.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"><img border="0" height="398" src="http://1.bp.blogspot.com/_FsLa1cMTCWU/TLoCljdYhcI/AAAAAAAAAdw/FXa-htDiB5E/s400/jim_cramer_BP.png" width="400" /></a></div><div class="MsoNormal"><br /></div><div class="MsoNormal"><br /></div><div class="MsoNormal">Cramer issued a Sell&nbsp;recommendation&nbsp;on 10 occasions, a Positive on 1 occasion and a Buy on 6 Occasions (a total of 17 recommendations).</div><div class="MsoNormal"><br /></div><div class="MsoNormal"><b>Conclusion</b></div><div class="MsoNormal">I'll leave it to you to draw your own conclusions about how to interpret recommendations given by Jim Cramer. &nbsp;For myself, I find him entertaining and well informed about financial news for a wide range of stocks. &nbsp;He also has the track record as a successful hedge fund manager over the course of a number of years. &nbsp;However, I am skeptical about the ability of analysts to&nbsp;consistently&nbsp;predict the direction of the market. &nbsp;</div><div class="MsoNormal"><br /></div><div class="MsoNormal">Depending upon the reception of this post, I can provide additional information about the methods used to obtain the data and create the charts above and also show how other stocks recommended on Mad Money have performed. &nbsp;Let me know if you have any interest in the comments.</div><div class="MsoNormal"><br /></div><div class="MsoNormal"><br /></div><div class="MsoNormal"><br /></div></content>
<link rel="replies" type="application/atom+xml" href="http://www.r-chart.com/feeds/1700837471508690785/comments/default" title="Post Comments"/>
<link rel="replies" type="text/html" href="http://www.r-chart.com/2010/10/cramers-stock-pick-recommendations.html#comment-form" title="1 Comments"/>
<link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/3867310391951630980/posts/default/1700837471508690785"/>
<link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/3867310391951630980/posts/default/1700837471508690785"/>
<link rel="alternate" type="text/html" href="http://www.r-chart.com/2010/10/cramers-stock-pick-recommendations.html" title="Cramer's Stock Pick Recommendations Analyzed"/>
<author>
<name>C</name>
<uri>http://www.blogger.com/profile/02893688387818336028</uri>
<email>[email protected]</email>
<gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif"/>
</author>
<media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://2.bp.blogspot.com/_FsLa1cMTCWU/TLx6DP7sw3I/AAAAAAAAAd4/rL18W-q7TTE/s72-c/R_With_Jim_Cramer2.png" height="72" width="72"/>
<thr:total>1</thr:total>
</entry>
<entry>
<id>tag:blogger.com,1999:blog-3867310391951630980.post-2838026290767710922</id>
<published>2010-10-16T06:46:00.000-07:00</published>
<updated>2010-10-16T10:13:32.402-07:00</updated>
<category scheme="http://www.blogger.com/atom/ns#" term="fractals"/>
<title type="text">Benoit Mandelbrot (the Father of Fractals) dead at 85</title>
<content type="html"><div class="separator" style="clear: both; text-align: center;"><a href="http://upload.wikimedia.org/wikipedia/commons/thumb/f/fc/Mandel_zoom_08_satellite_antenna.jpg/800px-Mandel_zoom_08_satellite_antenna.jpg" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"><img border="0" height="240" src="http://upload.wikimedia.org/wikipedia/commons/thumb/f/fc/Mandel_zoom_08_satellite_antenna.jpg/800px-Mandel_zoom_08_satellite_antenna.jpg" width="320" /></a></div><br /><br /><a href="http://www.fooledbyrandomness.com/">Nicholas Taleb</a>&nbsp;and&nbsp;<a href="http://twitter.com/#search?q=benoit%20mandelbrot">Recent tweets</a>&nbsp;indicate that <a href="http://kottke.org/10/10/benoit-mandelbrot-rip">Benoit Mandelbrot has died</a> at age 85. &nbsp;Mandelbrot was a French and American mathematician, best known as the father of fractal geometry. &nbsp;His official <a href="http://www.math.yale.edu/mandelbrot/">biography at Yale</a>&nbsp;does not yet reflect this (it was last updated in March 2010) and his <a href="http://en.wikipedia.org/wiki/Beno%C3%AEt_Mandelbrot">Wikipedia page</a> is protected from editing&nbsp;for the moment.<br /><br />UPDATE: &nbsp;The <a href="http://www.nytimes.com/2010/10/17/us/17mandelbrot.html?_r=2">New York Times</a> has confirmed this.<br /><br />He is known for&nbsp;changing the way researchers perceive and characterize the phenomenon of natural growth and for the implications for his work for scientists and mathematicians. &nbsp;But fractals have become iconic to the masses as expressive of the idea that beauty and mathematics are inextricably related. &nbsp;As I <a href="http://www.r-chart.com/2010/08/fractals-in-r.html">mentioned&nbsp;elsewhere</a>&nbsp;my brother and I were inspired at an early age by the wonderful designs that could be created by simple mathematical equations. <br /><br />He will be missed.</content>
<link rel="replies" type="application/atom+xml" href="http://www.r-chart.com/feeds/2838026290767710922/comments/default" title="Post Comments"/>
<link rel="replies" type="text/html" href="http://www.r-chart.com/2010/10/benoit-mandelbrot-father-of-fractals.html#comment-form" title="1 Comments"/>
<link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/3867310391951630980/posts/default/2838026290767710922"/>
<link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/3867310391951630980/posts/default/2838026290767710922"/>
<link rel="alternate" type="text/html" href="http://www.r-chart.com/2010/10/benoit-mandelbrot-father-of-fractals.html" title="Benoit Mandelbrot (the Father of Fractals) dead at 85"/>
<author>
<name>C</name>
<uri>http://www.blogger.com/profile/02893688387818336028</uri>
<email>[email protected]</email>
<gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif"/>
</author>
<thr:total>1</thr:total>
</entry>
<entry>
<id>tag:blogger.com,1999:blog-3867310391951630980.post-487357515084765504</id>
<published>2010-10-06T08:45:00.000-07:00</published>
<updated>2010-10-06T08:45:15.207-07:00</updated>
<category scheme="http://www.blogger.com/atom/ns#" term="ggplot2"/>
<category scheme="http://www.blogger.com/atom/ns#" term="fitness"/>
<title type="text">Belgian Astronomers and Exercise Machines</title>
<content type="html"><div class="separator" style="clear: both; text-align: left;"><a href="http://1.bp.blogspot.com/_FsLa1cMTCWU/TKyZWjjRgxI/AAAAAAAAAdQ/llS8uX5lGeQ/s1600/BMI.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"><img border="0" height="398" src="http://1.bp.blogspot.com/_FsLa1cMTCWU/TKyZWjjRgxI/AAAAAAAAAdQ/llS8uX5lGeQ/s400/BMI.png" width="400" /></a></div><br />In the twisting paths of human discovery, you never quite know what intellectual enterprise is going to result in a world changing discovery. &nbsp;For instance, the mathematical notion of <a href="http://en.wikipedia.org/wiki/Expected_value">expected value</a> did not grow up in a sterile, academic environment. &nbsp;&nbsp;In 1654 Blaise Pascal was approached by Chevalier de Méré who was interested in gambling problems. &nbsp;Pascal corresponded with Fermat and thus the mathematical <a href="http://en.wikipedia.org/wiki/Theory_of_probability">theory of probabilities</a> was born. <br /><br />In recent days reports on economic upheaval have often cast financial industries as institutions based upon greed and power that contribute nothing of value to society. &nbsp;Defenders of the free market are quick to respond with the immediate economic benefits provided by such institutions as they mitigate risk and serve as "middle men" in modern financial markets. &nbsp;What is seldom considered is that discoveries in one area often find application in a separate area of life that was never considered during the initial investigation. &nbsp;And so one day, perhaps Wall Street calculations might be put to non-financial use that benefit mankind in other ways. &nbsp;There is historical precedent. &nbsp;For instance, interests in the insurance industry served to popularize and apply a 19th century Belgian calculation in a manner that is now used on modern exercise machines. <br /><br /><b>A Belgian Astronomer: Adolphe Quetelet</b><br /><a href="http://ndt.oxfordjournals.org/content/23/1/47.full">Adolphe Quetelet</a> (1796–1874) was a Belgian mathematician, astronomer and statistician. &nbsp;While studying astronomical activities in Paris he interacted with Joseph Fourier (1768–1830), Siméon Poisson (1781–1840) and Pierre Laplace (1749–1827). &nbsp; He went on to put his new found appreciation of probability to practical use in the study of the human body (a subject he had initially approached as a painter and sculptor). &nbsp;One calculation he created, dubbed the Quetelet Index, is a number that expresses a relationship between a person's height and weight. &nbsp;Quetelet was not specifically interested in the use of his index for health purposes, but simply for defining the characteristics of "normal" or <a href="http://ndt.oxfordjournals.org/content/23/1/47.full">"average" man</a>.<br /><br /><b>The Financial Industry</b><br />In the mid 20th century actuaries observed increased mortality in overweight policyholders. &nbsp;And so in an effort to construct more accurate mortality tables the relationship between weight and cardiovascular disease became the subject of epidemiological studies. &nbsp;Weight tables were first used to predict life expectancy as far back as 1913. &nbsp;But tables of ideal or desirable weight were developed by the Metropolitan Life Insurance Company in the 1940's. &nbsp;In the 1960s, a small group insurance industry experts began to use the Quetelet Index. &nbsp;But it remained for a an American scientist to perform a comparative study of available indices and rename the Quetelet Index to the form that we know it today where it has become a subject related to <a href="http://profiles.nlm.nih.gov/NN/B/C/Q/W/_/nnbcqw.ocr">health and nutrition</a>.<br /><br /><b>An American Oceanographer, Biologist, and Physiologist</b><br /><a href="http://en.wikipedia.org/wiki/Ancel_Keys">Ancel Benjamin Keys</a> was a scientist who wrote an article for the July 1972 "Journal of Chronic Diseases" that <a href="http://www.ncbi.nlm.nih.gov/pubmed/17890752">coined the phrase "body mass index"</a> or <a href="http://en.wikipedia.org/wiki/Body_mass_index">BMI</a> as a modern designation for the Quetelet Index. &nbsp;Interestingly enough, Keys early studies culminated in a B.A. in economics and political science in 1925. &nbsp;His first Ph.D. was in oceanography and biology but his later work was related to his second Ph.D. focused on physiology. &nbsp;He is best known for two dietary contributions - the <a href="http://en.wikipedia.org/wiki/K-ration">K-Ration</a> and the <a href="http://en.wikipedia.org/wiki/Mediterranean_diet">Mediterranean Diet</a>.<br /><br />Keys (and <a href="http://www.slate.com/id/2223095/">others today</a>) considered the tendency in the insurance industry to equate relative body weight with excess risk of death to be <a href="http://mbbnet.umn.edu/firsts/blackburn_h.html">somewhat simplistic</a>. &nbsp;There is worldwide variation according to diet and physical activity habits. In most industrial countries people in the middle range of body weight are healthier than those at an extreme.<br /><br />Because of these types of concerns, BMI is often considered along with other concerns that can indicate potential health risks. &nbsp;<a href="http://www.win.niddk.nih.gov/publications/tools.htm">Specifically</a>:<br /><br /><ul><li>A BMI in the overweight category along with certain diseases&nbsp;</li><li>A BMI of less than 25 and a waist size above the standard (35 for women or 40 for men)&nbsp;</li></ul><br />The actual BMI ranges considered healthy or at risk are still being debated. &nbsp;In 1998, the U.S. National Institutes of Health changed U.S. definition of normal from 27.8 to 25 to conform to <a href="http://apps.who.int/bmi/index.jsp?introPage=intro_3.html">World Health Organization Standards</a>. &nbsp;In addition, other countries in the world are encouraging the upper limit for BMI to be even lower than 25.<br /><br /><b>BMI and R</b><br />One’s optimal weight can be derived using the BMI and height as follows:<br /><br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: small;">optimal_weight = function (height, bmi){</span></span><br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: small;">&nbsp;&nbsp;round((height**2 * bmi) / 703)</span></span><br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: small;">}</span></span><br /><br />A grid, similar to one found <a href="http://www.nhlbi.nih.gov/guidelines/obesity/bmi_tbl.htm">here</a>&nbsp;and the chart above can be constructed a <a href="http://github.com/ezgraphs/R-Programs/blob/master/bmi.R">script found at GitHub</a>.<br /><br />For example:<br /><br /><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">&gt; create_bmi_dataframe(bmi_end=30)</span></span><br /><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">&nbsp;&nbsp; &nbsp;19 &nbsp;20 &nbsp;21 &nbsp;22 &nbsp;23 &nbsp;24 &nbsp;25 &nbsp;26 &nbsp;27 &nbsp;28 &nbsp;29 &nbsp;30</span></span><br /><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">60 &nbsp;97 102 108 113 118 123 128 133 138 143 149 154</span></span><br /><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">61 101 106 111 116 122 127 132 138 143 148 153 159</span></span><br /><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">62 104 109 115 120 126 131 137 142 148 153 159 164</span></span><br /><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">63 107 113 119 124 130 135 141 147 152 158 164 169</span></span><br /><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">64 111 117 122 128 134 140 146 151 157 163 169 175</span></span><br /><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">65 114 120 126 132 138 144 150 156 162 168 174 180</span></span><br /><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">66 118 124 130 136 143 149 155 161 167 173 180 186</span></span><br /><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">67 121 128 134 140 147 153 160 166 172 179 185 192</span></span><br /><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">68 125 132 138 145 151 158 164 171 178 184 191 197</span></span><br /><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">69 129 135 142 149 156 163 169 176 183 190 196 203</span></span><br /><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">70 132 139 146 153 160 167 174 181 188 195 202 209</span></span><br /><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">71 136 143 151 158 165 172 179 186 194 201 208 215</span></span><br /><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">72 140 147 155 162 170 177 184 192 199 206 214 221</span></span><br /><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">73 144 152 159 167 174 182 190 197 205 212 220 227</span></span><br /><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">74 148 156 164 171 179 187 195 203 210 218 226 234</span></span><br /><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">75 152 160 168 176 184 192 200 208 216 224 232 240</span></span><br /><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">76 156 164 173 181 189 197 205 214 222 230 238 246</span></span><br /><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">77 160 169 177 186 194 202 211 219 228 236 245 253</span></span><br /><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">78 164 173 182 190 199 208 216 225 234 242 251 260</span></span><br /><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">79 169 178 186 195 204 213 222 231 240 249 257 266</span></span><br /><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">80 173 182 191 200 209 218 228 237 246 255 264 273</span></span><br /><br />There is some variation with the government site – perhaps related to rounding.</content>
<link rel="replies" type="application/atom+xml" href="http://www.r-chart.com/feeds/487357515084765504/comments/default" title="Post Comments"/>
<link rel="replies" type="text/html" href="http://www.r-chart.com/2010/10/belgian-astronomers-and-exercise.html#comment-form" title="0 Comments"/>
<link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/3867310391951630980/posts/default/487357515084765504"/>
<link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/3867310391951630980/posts/default/487357515084765504"/>
<link rel="alternate" type="text/html" href="http://www.r-chart.com/2010/10/belgian-astronomers-and-exercise.html" title="Belgian Astronomers and Exercise Machines"/>
<author>
<name>C</name>
<uri>http://www.blogger.com/profile/02893688387818336028</uri>
<email>[email protected]</email>
<gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif"/>
</author>
<media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://1.bp.blogspot.com/_FsLa1cMTCWU/TKyZWjjRgxI/AAAAAAAAAdQ/llS8uX5lGeQ/s72-c/BMI.png" height="72" width="72"/>
<thr:total>0</thr:total>
</entry>
<entry>
<id>tag:blogger.com,1999:blog-3867310391951630980.post-571095303318431682</id>
<published>2010-10-04T16:10:00.000-07:00</published>
<updated>2010-10-04T16:10:48.086-07:00</updated>
<category scheme="http://www.blogger.com/atom/ns#" term="ggplot2"/>
<category scheme="http://www.blogger.com/atom/ns#" term="Ruby"/>
<category scheme="http://www.blogger.com/atom/ns#" term="fitness"/>
<title type="text">Max Heart Rate Calculations Compared</title>
<content type="html"><div class="separator" style="clear: both; text-align: center;"><a href="http://2.bp.blogspot.com/_FsLa1cMTCWU/TKpesmcsmpI/AAAAAAAAAdM/Xw11_Hgr95c/s1600/R_Heart.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"><img border="0" src="http://2.bp.blogspot.com/_FsLa1cMTCWU/TKpesmcsmpI/AAAAAAAAAdM/Xw11_Hgr95c/s1600/R_Heart.png" /></a></div><div class="separator" style="clear: both; text-align: center;"><br /></div><br />Physical fitness has become increasingly technical and data driven. &nbsp;I started running a bit in the last few months and have been delving into the prevailing wisdom related to assessing ones health as a baseline for pursuing various fitness goals. &nbsp;Some of the terms related to tracking a heart rate gave me visions of white lab coats, cardiac monitors and sophisticated formulas based upon years of scientific analysis. &nbsp;And while there may be truth to this, the practical reality is quite a bit simpler.<br /><div style="text-align: center;"><a href="http://4.bp.blogspot.com/_FsLa1cMTCWU/TKpYJ4Lg1OI/AAAAAAAAAc8/6rmE4n7hdWQ/s1600/max_heart_rate_calculation_methods.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"><img border="0" height="319" src="http://4.bp.blogspot.com/_FsLa1cMTCWU/TKpYJ4Lg1OI/AAAAAAAAAc8/6rmE4n7hdWQ/s320/max_heart_rate_calculation_methods.png" style="cursor: move;" width="320" /></a></div><br />In many workout routines, a <a href="http://www.americanheart.org/presenter.jhtml?identifier=4736">target heart rate</a> is&nbsp;calculated which is supposed to identify a range (usually in beats per minute) during an exercise routine&nbsp;that will provide optimal cardiovascular value. &nbsp;The basic idea is that you want a work out that is rigorous enough to derive a benefit from the exercise without harming your body. &nbsp;It is appealing in that it provides an objective measure to evaluate your workout. &nbsp;And once your workout can be measured, it is possible to set goals and work to improve your heath.<br /><br />What is implied in the idea of a target heart rate is that there is some upper limit that cannot safely be exceeded. &nbsp;You might think that you need to be hooked up to a bunch of cardiac sensors to find out this value - and although this might be optimal, it is not the technique used by most folks. &nbsp;Instead, there are relatively simple formulas that are used to calculate a maximum heart rate for an individual. &nbsp;They are usually based only upon age (although some calculations consider gender as well).<br /><br /><table><tbody><tr><td><b>Maximum Heart Rate Calculations</b><br /><a href="http://en.wikipedia.org/wiki/Heart_rate">Various formulas</a>&nbsp;(most of them simple linear formulas at that) have been devised to estimate individual Maximum Heart Rates. &nbsp;However actual maximum heart rates vary significantly&nbsp;between individuals based upon physiology, physical fitness and other factors&nbsp;so the value of the metric is disputed. &nbsp;Nevertheless, I was interested in comparing the available formulas to get a sense of a range (based upon "ensembling" if you will) of what is being reported or suggested by health sites, software and machines that use this value. <br /><br />One of the gizmos I have begun using is the Garmin GPS with heart rate monitor. &nbsp;I am impressed with its performance so far. &nbsp;It includes its own software that does most of the types of data aggregation and summary that you would like - but I look forward to geeking out and seeing what can be done with the data in R in later posts.</td><td><br /><iframe frameborder="0" marginheight="0" marginwidth="0" scrolling="no" src="http://rcm.amazon.com/e/cm?lt1=_blank&amp;bc1=000000&amp;IS2=1&amp;bg1=FFFFFF&amp;fc1=000000&amp;lc1=0000FF&amp;t=rcha-20&amp;o=1&amp;p=8&amp;l=as1&amp;m=amazon&amp;f=ifr&amp;md=10FE9736YVPPT7A0FBG2&amp;asins=B000CSWCQA" style="height: 240px; width: 120px;"></iframe></td></tr></tbody></table><br /><b>Method</b><br />A <a href="http://github.com/ezgraphs/Ruby-Scripts/blob/master/heart_rate_max.rb">ruby script</a>&nbsp;was used to create a semicolon delimited file with the maximum heart rate from ages 18 through 90 for various calculation methods described in the <a href="http://en.wikipedia.org/wiki/Heart_rate">Wikipedia article</a>. &nbsp;The <a href="http://github.com/ezgraphs/R-Programs/blob/master/heart_rate_max.csv">resulting data</a>&nbsp;can be read into an <a href="http://github.com/ezgraphs/R-Programs/blob/master/heart_rate_max.R">R script</a> to produce the charts in this blog. <br /><br />&nbsp;A summary that combines the calculations combined does not make a whole lot of sense since two of the calculations in use are for women only and one is for men only. &nbsp;However, all of the techniques fit within a relatively narrow range (since we human beings aren't quite that random). &nbsp;Besides, the two calculations for women are among the most divergent presented, and so cancel each other out in part (though they probably pull down the average for they younger and older ends of the spectrum).<br /><br /><div class="separator" style="clear: both; text-align: center;"><a href="http://2.bp.blogspot.com/_FsLa1cMTCWU/TKpZpS0CO2I/AAAAAAAAAdE/Rn-gUpVBtUE/s1600/all_methods_smoothed.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"><img border="0" height="319" src="http://2.bp.blogspot.com/_FsLa1cMTCWU/TKpZpS0CO2I/AAAAAAAAAdE/Rn-gUpVBtUE/s320/all_methods_smoothed.png" width="320" /></a></div><div class="separator" style="clear: both; text-align: center;"><br /></div><div class="separator" style="clear: both; text-align: left;">This average is included in the chart below - which is easier to see if you generate it yourself and stretch it to a size suitable for your monitor.</div><div class="separator" style="clear: both; text-align: center;"><br /></div><div class="separator" style="clear: both; text-align: center;"><a href="http://2.bp.blogspot.com/_FsLa1cMTCWU/TKpZpS0CO2I/AAAAAAAAAdE/Rn-gUpVBtUE/s1600/all_methods_smoothed.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"></a><a href="http://1.bp.blogspot.com/_FsLa1cMTCWU/TKpZmegkceI/AAAAAAAAAdA/BV9tWLzRVc4/s1600/all_methods_smoothed_points.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"><img border="0" height="319" src="http://1.bp.blogspot.com/_FsLa1cMTCWU/TKpZmegkceI/AAAAAAAAAdA/BV9tWLzRVc4/s320/all_methods_smoothed_points.png" width="320" /></a></div><div class="separator" style="clear: both; text-align: left;"><br /></div><div class="separator" style="clear: both; text-align: left;">The only input value considered in calculations is gender - two of which are specific to women and one for men. &nbsp;It seems that the most popular calculations don't bother with gender anyway.</div><div class="separator" style="clear: both; text-align: center;"><a href="http://2.bp.blogspot.com/_FsLa1cMTCWU/TKpZqxjKanI/AAAAAAAAAdI/sE1y18LsPSc/s1600/women_heart_rate_max.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"><img border="0" height="319" src="http://2.bp.blogspot.com/_FsLa1cMTCWU/TKpZqxjKanI/AAAAAAAAAdI/sE1y18LsPSc/s320/women_heart_rate_max.png" width="320" /></a></div>There are a number of possibilities for using R with fitness devices that provide heart rate information, geographic data, time, distance, caloric intake and consumption, etc. &nbsp;I was was not able to find much in the way of open source fitness related calculation software APIs &nbsp;- so this could be an new area for R developers to address. &nbsp;(It also provides some balance to the relatively sedentary life of developing and blogging).</content>
<link rel="replies" type="application/atom+xml" href="http://www.r-chart.com/feeds/571095303318431682/comments/default" title="Post Comments"/>
<link rel="replies" type="text/html" href="http://www.r-chart.com/2010/10/max-heart-rate-calculations-compared.html#comment-form" title="2 Comments"/>
<link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/3867310391951630980/posts/default/571095303318431682"/>
<link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/3867310391951630980/posts/default/571095303318431682"/>
<link rel="alternate" type="text/html" href="http://www.r-chart.com/2010/10/max-heart-rate-calculations-compared.html" title="Max Heart Rate Calculations Compared"/>
<author>
<name>C</name>
<uri>http://www.blogger.com/profile/02893688387818336028</uri>
<email>[email protected]</email>
<gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif"/>
</author>
<media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://2.bp.blogspot.com/_FsLa1cMTCWU/TKpesmcsmpI/AAAAAAAAAdM/Xw11_Hgr95c/s72-c/R_Heart.png" height="72" width="72"/>
<thr:total>2</thr:total>
</entry>
<entry>
<id>tag:blogger.com,1999:blog-3867310391951630980.post-5372288810399868062</id>
<published>2010-09-24T16:10:00.000-07:00</published>
<updated>2010-09-24T16:20:13.911-07:00</updated>
<category scheme="http://www.blogger.com/atom/ns#" term="Data Preparation"/>
<title type="text">Find Duplicate Records in a File</title>
<content type="html"><div class="separator" style="clear: both; text-align: center;"></div><span class="Apple-style-span" style="-webkit-border-horizontal-spacing: 2px; -webkit-border-vertical-spacing: 2px;"><div><br /></div><div class="separator" style="clear: both; text-align: center;"><a href="http://2.bp.blogspot.com/_FsLa1cMTCWU/TJ0xg1VG4QI/AAAAAAAAAcc/qLZWC4C3-m4/s1600/R_duplicate_record.PNG" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"><img border="0" src="http://2.bp.blogspot.com/_FsLa1cMTCWU/TJ0xg1VG4QI/AAAAAAAAAcc/qLZWC4C3-m4/s1600/R_duplicate_record.PNG" /></a></div><div><br /></div><div>In the world of data preparation a common task is to identify duplicate records in a file or data set. &nbsp;A few years ago, I did most development work in Java, and shudder to think of the amount of code required to accomplish this sort of task. &nbsp;Since that time I been involved in many projects that did not require programming in a specific language, but simply "getting the job done." &nbsp;With that in mind, "removal of duplicate records in a file" can be construed as manipulating a data set rather than an exercise in file processing. &nbsp;The following shows how R compares with other technologies when performing this task.</div><div><br /></div><div><br /></div><div>A file named "file.txt" containing semicolon delimited records appears in the examples below:</div><div><br /></div><div><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: small;">1;abc;123;etc</span></span></div><div><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: small;">1234;qwer;4321;etc</span></span></div><div><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: small;">1235;asdf;4341;etc</span></span></div><div><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: small;">1;abc;123;etc</span></span></div><div><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: small;">1234;qwer;5555;etc</span></span></div><div><br /></div><div><br /></div><div>Identifying rows in which every field is duplicated is relatively straight forward using Unix utilities. &nbsp;The file can be outputted using the cat (concatenate) command, the results sorted, and a unique list of results (prefaced by the count of occurrences in the file) can be filtered by a regular expression that indicates any row that has a number of occurrences not equal to one.</div><div><br /></div><div>&nbsp;<span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">&nbsp;cat file.txt | sort | uniq -c | grep "^ &nbsp; [^1 ]"</span></span></div><div><br /></div><div>Often, it is more concise to speak Unix than English. &nbsp;This starts to break down a bit when considering each line in the file as a record with distinguishable fields.</div><div><br /></div><div>General purpose programming languages can do the same thing, but are a bit more verbose. &nbsp;In ruby, the file can be read into an array of arrays.</div><div><br /></div><div>&nbsp;<span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">&nbsp;&nbsp;</span></span><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">a=File.open('file.txt').readlines.map{|l|l.split(';')}</span></span></div><div><br /></div><div>A list of unique rows can be outputted using the following one liner:</div><div><br /></div><div><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">&nbsp;</span></span><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">&nbsp;&nbsp;a.uniq.each{|l|puts l.join(';')}</span></span></div><div><br /></div><div>And with a bit more effort, you can write a program that will filter the results as needed. &nbsp;This type of processing can also be done declaratively in R where such a file is read in as a data frame.</div><div><br /></div><div><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">&nbsp;&nbsp;&nbsp;</span></span><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">df=read.csv('file.txt',sep=';',header=FALSE)</span></span></div><div><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">&nbsp;&nbsp; unique(df)</span></span></div><div><br /></div><div>The duplicated function can also be used to identify the single row that is duplicated.</div><div><br /></div><div><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">&nbsp;&nbsp; df[duplicated(df),]</span></span></div><div><br /></div><div>That is to say, the all fields in all records in the following data frames are equal.</div><div><br /></div><div><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">&nbsp;&nbsp; unique(df)==df[!duplicated(df),]</span></span></div><div><br /></div><div>The situation gets a bit more complicated when you want to only use some of the delimited fields to identify duplicate records. &nbsp;In the data set above, rows 1 and 4 are identical. &nbsp;Consider the requirement to recognize lines 2 and 5 as identical (due to the first two fields matching). &nbsp;In Unix, you could use awk with the -F option to process the delimited fields. &nbsp;In ruby you could store key fields in variables and compare them with each row. &nbsp;If you come from the SQL world, you could use the R sqldf package to treat the data frame as a database table.</div><div><br /></div><div><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: small;">&nbsp;&nbsp; sqldf('select * from df group by V1, V2 having count(*) &gt;1')</span></span></div><div><br /></div><div>The R way of getting this information is to identify the indices of the duplicated rows.</div><div><br /></div><div><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">&nbsp;&nbsp; df[duplicated(df[c(1,2)]),]</span></span></div><div><br /></div><div><br /></div><div>I enjoy looking at the overlapping aspects of programming languages and utilities (like <a href="http://www.oracle.com/technology/pub/articles/saternos-filtering.html">this OTN Article</a> from a few years ago). &nbsp; It is helpful for highlighting the right tool for the right job, and aids in communication with other technical professionals. &nbsp;</div></span></content>
<link rel="replies" type="application/atom+xml" href="http://www.r-chart.com/feeds/5372288810399868062/comments/default" title="Post Comments"/>
<link rel="replies" type="text/html" href="http://www.r-chart.com/2010/09/find-duplicate-records-in-file.html#comment-form" title="7 Comments"/>
<link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/3867310391951630980/posts/default/5372288810399868062"/>
<link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/3867310391951630980/posts/default/5372288810399868062"/>
<link rel="alternate" type="text/html" href="http://www.r-chart.com/2010/09/find-duplicate-records-in-file.html" title="Find Duplicate Records in a File"/>
<author>
<name>C</name>
<uri>http://www.blogger.com/profile/02893688387818336028</uri>
<email>[email protected]</email>
<gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif"/>
</author>
<media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://2.bp.blogspot.com/_FsLa1cMTCWU/TJ0xg1VG4QI/AAAAAAAAAcc/qLZWC4C3-m4/s72-c/R_duplicate_record.PNG" height="72" width="72"/>
<thr:total>7</thr:total>
</entry>
<entry>
<id>tag:blogger.com,1999:blog-3867310391951630980.post-9014193283880751967</id>
<published>2010-09-22T16:19:00.000-07:00</published>
<updated>2010-09-22T16:19:28.493-07:00</updated>
<category scheme="http://www.blogger.com/atom/ns#" term="World Bank Data"/>
<title type="text">New World Bank Data Available</title>
<content type="html"><div class="separator" style="clear: both; text-align: center;"><a href="http://2.bp.blogspot.com/_FsLa1cMTCWU/TJqOTzDi7GI/AAAAAAAAAcM/_GAp7GAkwEA/s1600/totalpopulation.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"><img border="0" height="319" src="http://2.bp.blogspot.com/_FsLa1cMTCWU/TJqOTzDi7GI/AAAAAAAAAcM/_GAp7GAkwEA/s320/totalpopulation.png" width="320" /></a></div><br />Just announced: &nbsp;World Bank Data <a href="http://data.worldbank.org/news/new-features">features and data</a> are available. &nbsp;<a href="http://www.r-chart.com/search/label/World%20Bank%20Data">Previous posts</a> have demonstrated how to access and plot this data using R (including the use of the R <a href="http://cran.r-project.org/web/packages/WDI/index.html">WDI package</a>). &nbsp;The chart above can be created using the following program in R.<br /><br /><span class="Apple-style-span" style="color: #333333; line-height: 18px;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: small;">library(ggplot2)<br />library(WDI)</span></span></span><br /><span class="Apple-style-span" style="color: #333333; line-height: 18px;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: small;"><br /></span></span></span><br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: small;">DF &lt;- WDI(country=c("US","FR","DE","GB","CN","RU","IN"), indicator="SP.POP.TOTL", start=1990, end=2008)</span></span><br /><div><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: small;"><br /></span></span></div><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: small;">ggplot(DF, aes(year, SP.POP.TOTL,&nbsp;ggplot(DF, aes(year, SP.POP.TOTL, color=country))+geom_line(stat="identity")+theme_bw()+xlab("Year")+opts(title="Total Population")+ylab("")</span></span><br /><div><span class="Apple-style-span" style="color: #333333; font-family: 'Courier New', Courier, monospace; font-size: small; line-height: 18px;"><span class="Apple-style-span" style="color: black; font-family: 'Times New Roman'; font-size: medium; line-height: normal;"><br /></span></span></div></content>
<link rel="replies" type="application/atom+xml" href="http://www.r-chart.com/feeds/9014193283880751967/comments/default" title="Post Comments"/>
<link rel="replies" type="text/html" href="http://www.r-chart.com/2010/09/new-world-bank-data-available.html#comment-form" title="1 Comments"/>
<link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/3867310391951630980/posts/default/9014193283880751967"/>
<link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/3867310391951630980/posts/default/9014193283880751967"/>
<link rel="alternate" type="text/html" href="http://www.r-chart.com/2010/09/new-world-bank-data-available.html" title="New World Bank Data Available"/>
<author>
<name>C</name>
<uri>http://www.blogger.com/profile/02893688387818336028</uri>
<email>[email protected]</email>
<gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif"/>
</author>
<media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://2.bp.blogspot.com/_FsLa1cMTCWU/TJqOTzDi7GI/AAAAAAAAAcM/_GAp7GAkwEA/s72-c/totalpopulation.png" height="72" width="72"/>
<thr:total>1</thr:total>
</entry>
<entry>
<id>tag:blogger.com,1999:blog-3867310391951630980.post-718438522319707387</id>
<published>2010-09-18T11:29:00.000-07:00</published>
<updated>2010-09-18T11:29:32.263-07:00</updated>
<category scheme="http://www.blogger.com/atom/ns#" term="Conferences"/>
<title type="text">Elder Research Two Day Course</title>
<content type="html">... or what I did on my summer vacation...<br /><br /><a href="http://www.blogger.com/"></a><span id="goog_183505738"></span><span id="goog_183505739"></span>Just got back from the <a href="http://www.datamininglab.com/Training/Annual2DayCourse/tabid/66/Default.aspx">Elder Research Two Day Course</a>&nbsp;"<a href="http://www.datamininglab.com/Training/Annual2DayCourse/tabid/66/Default.aspx">Tools for Discovering Patterns in Data</a>". &nbsp;It was a great course that (while not R specific) provides a great overview of Data Mining tools and techniques and insight into current applications in a wide variety of industries. &nbsp; <br /><br /><table><tbody><tr><td>Dr. Elder is a coauthor of a book available online (and provided with the course) called "Handbook of Statistical Data Analysis and Data Mining Applications." &nbsp;This book&nbsp;contains a wealth of practical examples and tutorials (most using the Statsoft Statistica software). &nbsp;It has a decidedly practical emphasis that allows you to see how algorithms are used to discern patterns in the data and to evaluate and compare how effective they are with specific data sets. &nbsp;Functional areas covered in the tutorials include aviation safety, movie box office receipts, customer services, credit scoring, automobile brand review, quality control, business administration in a medical industry, psychological evaluation, dentistry and profit analysis. &nbsp;This is very helpful for those who prefer to work from the concrete to the general (rather than being provided mathematical abstractions that you then apply to specific situations). &nbsp;They might also be helpful for showing a business user why data mining matters and what value it brings to a business or organization.</td><td><iframe frameborder="0" marginheight="0" marginwidth="0" scrolling="no" src="http://rcm.amazon.com/e/cm?lt1=_blank&amp;bc1=000000&amp;IS2=1&amp;bg1=FFFFFF&amp;fc1=000000&amp;lc1=0000FF&amp;t=rcha-20&amp;o=1&amp;p=8&amp;l=as1&amp;m=amazon&amp;f=ifr&amp;md=10FE9736YVPPT7A0FBG2&amp;asins=0123747651" style="height: 240px; width: 120px;"></iframe></td></tr></tbody></table><br /><table><tbody><tr><td><br /><iframe frameborder="0" marginheight="0" marginwidth="0" scrolling="no" src="http://rcm.amazon.com/e/cm?lt1=_blank&amp;bc1=000000&amp;IS2=1&amp;bg1=FFFFFF&amp;fc1=000000&amp;lc1=0000FF&amp;t=rcha-20&amp;o=1&amp;p=8&amp;l=as1&amp;m=amazon&amp;f=ifr&amp;md=10FE9736YVPPT7A0FBG2&amp;asins=0321321367" style="height: 240px; width: 120px;"></iframe></td><td><br />The conference covered many of the same topics discussed in Introduction to Data Mining by Tan, Steinbach and Kumar. &nbsp;However, there were many more concrete examples and applications of techniques in specific areas of finance, industry, government and education. &nbsp;A section of the book on ensemble methods is included in a larger section simply titled "Classification: Alternative Techniques". &nbsp;Dr. Elder went into greater detail on these topics and demonstrated the effectiveness of combining multiple models into a single model that is usually more accurate than the best of the individual component classifiers. &nbsp;It seems that different classifiers "see" certain parts of data sets better than others, and that combining classifiers results in a final analysis in which the best (most accurate) elements of each classification are retained while the worst aspects are largely ignored. &nbsp;By combining classifiers and manipulating the training set and input features a more accurate final model can be obtained.&nbsp;</td></tr></tbody></table><br /><table><tbody><tr><td><br />More detail about ensemble methods is available in another book coauthored by Dr. Elder entitled Ensemble Methods in Data Mining. &nbsp;This book goes into greater detail about how and when to use&nbsp;ensembling&nbsp;and includes some examples in R. &nbsp;The use of multiple classification techniques raises a number of interesting issues - on the one hand they seem to work in practice, but there use makes it more difficult to trace how a final combined model is constructed from the original data set. &nbsp;This has raised some interesting issues about the definition of complexity and the quest for simple accurate models.<br /><br />Dr. Andrew Fast presented on Text Mining and Social Network Analysis - and provided some valuable insights into these rapidly developing fields. &nbsp;There were also a number of software demos and time to interact with other members of Elder Research staff and conference participants.</td> <td><br /><iframe frameborder="0" marginheight="0" marginwidth="0" scrolling="no" src="http://rcm.amazon.com/e/cm?lt1=_blank&amp;bc1=000000&amp;IS2=1&amp;bg1=FFFFFF&amp;fc1=000000&amp;lc1=0000FF&amp;t=rcha-20&amp;o=1&amp;p=8&amp;l=as1&amp;m=amazon&amp;f=ifr&amp;md=10FE9736YVPPT7A0FBG2&amp;asins=1608452840" style="height: 240px; width: 120px;"></iframe></td> </tr></tbody></table><br />The conference took place in Charlottesville VA which is a great setting with many historical and recreational attractions nearby. <br /><br />So that's what I did on my summer vacation...</content>
<link rel="replies" type="application/atom+xml" href="http://www.r-chart.com/feeds/718438522319707387/comments/default" title="Post Comments"/>
<link rel="replies" type="text/html" href="http://www.r-chart.com/2010/09/elder-research-two-day-course.html#comment-form" title="0 Comments"/>
<link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/3867310391951630980/posts/default/718438522319707387"/>
<link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/3867310391951630980/posts/default/718438522319707387"/>
<link rel="alternate" type="text/html" href="http://www.r-chart.com/2010/09/elder-research-two-day-course.html" title="Elder Research Two Day Course"/>
<author>
<name>C</name>
<uri>http://www.blogger.com/profile/02893688387818336028</uri>
<email>[email protected]</email>
<gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif"/>
</author>
<thr:total>0</thr:total>
</entry>
<entry>
<id>tag:blogger.com,1999:blog-3867310391951630980.post-7052403659735605620</id>
<published>2010-09-07T17:54:00.000-07:00</published>
<updated>2010-09-07T17:54:39.761-07:00</updated>
<category scheme="http://www.blogger.com/atom/ns#" term="ggplot2"/>
<category scheme="http://www.blogger.com/atom/ns#" term="music"/>
<title type="text">Ah Bach...</title>
<content type="html">As announced by David Smith over at&nbsp;<a href="http://blog.revolutionanalytics.com/2010/09/competition-data-visualization-with-ggplot2.html">Revolution Analytics</a>,&nbsp;&nbsp;a <a href="http://github.com/hadley/ggplot2/wiki/Case-study-competition">ggplot2 Case Study Competition</a> is on...<br /><br /><table><tr><td><br /><br /><div class="separator" style="clear: both; text-align: center;"><a href="http://upload.wikimedia.org/wikipedia/commons/thumb/6/6a/Johann_Sebastian_Bach.jpg/220px-Johann_Sebastian_Bach.jpg" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"><img border="0" src="http://upload.wikimedia.org/wikipedia/commons/thumb/6/6a/Johann_Sebastian_Bach.jpg/220px-Johann_Sebastian_Bach.jpg" /></a></div></td><td>&nbsp; &nbsp;</td><td><br /><br />Rather than blogging for the last few days, I <a href="http://github.com/hadley/ggplot2/wiki/Bach-2-Part-Invention-in-F-Major-BWV779">cobbled together an entry</a>. &nbsp;It is not a particularly mind bending use of ggplot2, but the subject matter is relatively original. &nbsp;It is an brief analysis and visualization of a J.S. Bach 2 Part Invention. &nbsp;And because Bach's music is so well structured, the visualization itself is nice looking and well balanced. &nbsp;Perhaps suitable for geeky tee shirts...<br /></td></tr></table><a href="http://github.com/hadley/ggplot2/wiki/Bach-2-Part-Invention-in-F-Major-BWV779">Check it out</a> when you get a chance.</content>
<link rel="replies" type="application/atom+xml" href="http://www.r-chart.com/feeds/7052403659735605620/comments/default" title="Post Comments"/>
<link rel="replies" type="text/html" href="http://www.r-chart.com/2010/09/ah-bach.html#comment-form" title="0 Comments"/>
<link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/3867310391951630980/posts/default/7052403659735605620"/>
<link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/3867310391951630980/posts/default/7052403659735605620"/>
<link rel="alternate" type="text/html" href="http://www.r-chart.com/2010/09/ah-bach.html" title="Ah Bach..."/>
<author>
<name>C</name>
<uri>http://www.blogger.com/profile/02893688387818336028</uri>
<email>[email protected]</email>
<gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif"/>
</author>
<thr:total>0</thr:total>
</entry>
<entry>
<id>tag:blogger.com,1999:blog-3867310391951630980.post-6836947123869493862</id>
<published>2010-09-02T17:34:00.000-07:00</published>
<updated>2010-09-02T17:34:25.732-07:00</updated>
<category scheme="http://www.blogger.com/atom/ns#" term="ggplot2"/>
<category scheme="http://www.blogger.com/atom/ns#" term="Data Mining"/>
<title type="text">Bot Botany - K-Means and ggplot2</title>
<content type="html"><div class="separator" style="clear: both; text-align: center;"><span class="Apple-style-span" style="font-family: Arial; font-size: small;"><span class="Apple-style-span" style="font-size: 13px;"><br /></span></span><a href="http://3.bp.blogspot.com/_FsLa1cMTCWU/TIBByxcNtzI/AAAAAAAAAcE/SyYEDxjHVs4/s1600/botbotany.PNG" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"><img border="0" height="400" src="http://3.bp.blogspot.com/_FsLa1cMTCWU/TIBByxcNtzI/AAAAAAAAAcE/SyYEDxjHVs4/s400/botbotany.PNG" width="390" /></a></div>So if you had a robot that was an expert at botany - would you have a bot botanist? &nbsp;Among other things, it would need to to distinguish flowers through vision and image processing, and be able to classify various kinds of plants based upon specific characteristics. &nbsp;What do both of these requirements have in common? &nbsp; They can be done using the <a href="http://en.wikipedia.org/wiki/K-means_clustering">k-means clustering</a>. &nbsp;<a href="http://en.wikipedia.org/wiki/Image_segmentation">Image segmentation</a>&nbsp;can be used to allow our robot to recognize objects. &nbsp;Based upon petal and sepal size, it could determine say - the species of an iris. &nbsp;The well-known iris data set has been featured in <a href="http://www.r-chart.com/2010/07/thinking-about-graphs.html">other posts</a>. <br /><br /><b>K-Means in R</b><br />If you look up the k-means algorithm online or in a reference book, you will be met with a flurry a mathematical symbols and formal explanations. &nbsp;The basic principal (informally stated) is rather simple... given set of observations (picture a scatter plot of points), and a number of groups or clusters that you wish to group them in, the k-means algorithm finds the center of each group and associates observations with the groups with the "closest" center.<br /><br />To use k-means in R, call the <b>kmeans </b>function with a matrix of values and the number of centers. &nbsp;The function&nbsp;seeks to partition the points into&nbsp;<i>k</i>&nbsp;groups (the number of centers) such that the sum of squares from points to the assigned cluster centers is minimized. &nbsp;Each observation (point) belongs to the cluster with the nearest mean.<br /><br /><b>How-To</b><br />To start, we will copy the iris data set to a separate data frame. &nbsp;Not strictly speaking necessary, but makes it easier me to reflexively enter df whenever the data frame is in view. &nbsp;Next we create a matrix object containing only the Petal Length and Width.<br /><br /><br /><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">df=iris</span></span><br /><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">m=as.matrix(cbind(df$Petal.Length, df$Petal.Width),ncol=2)</span></span><br /><br />Now we will do the actual clustering. <br /><br /><br /><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">cl=(kmeans(m,3))</span></span><br /><br />Simple eh? &nbsp;The cl object contains a number of interesting attributes associated with the model. <br /><br /><br /><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">cl$size</span></span><br /><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">cl$withinss</span></span><br /><br /><br />Next we do a bit of data formatting and preparation for subsequent calls to graph the data. &nbsp;Notice that we add the cluster information back to our original data frame. &nbsp;This is a good organization of the data and also a requirement for working with ggplot2 which is designed to use data frames.<br /><br /><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">df$cluster=factor(cl$cluster)</span></span><br /><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">centers=as.data.frame(cl$centers)</span></span><br /><br />The following graph color codes the points by cluster. &nbsp;We also add the centers and a semi transparent halo around the center to emphasize the place of the center... and its role in classifying the observations into clusters.<br /><br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: small;">library(ggplot2)</span></span><br /><br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: small;">ggplot(data=df, aes(x=Petal.Length, y=Petal.Width, color=cluster )) +&nbsp;</span></span><br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: small;">&nbsp;geom_point() +&nbsp;</span></span><br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: small;">&nbsp;geom_point(data=centers, aes(x=V1,y=V2, color='Center')) +</span></span><br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: small;">&nbsp;geom_point(data=centers, aes(x=V1,y=V2, color='Center'), size=52, alpha=.3, legend=FALSE)</span></span><br /><br /><div class="separator" style="clear: both; text-align: center;"><a href="http://2.bp.blogspot.com/_FsLa1cMTCWU/TIAmvNNXSTI/AAAAAAAAAb8/wFveqOH5q8k/s1600/iris_kmeans_cluster.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"><img border="0" height="400" src="http://2.bp.blogspot.com/_FsLa1cMTCWU/TIAmvNNXSTI/AAAAAAAAAb8/wFveqOH5q8k/s400/iris_kmeans_cluster.png" width="400" /></a></div><br /><br />This plot is an interesting example of how several different sets of data (in this case the actual observations as well as the centers) in separate data frames can be included in a single ggplot2 chart. <br /><br /><b>Misclassified&nbsp;Observations</b><br />The models is pretty accurate, but not perfect. &nbsp;The following SQL statement highlights the few misclassified observations:<br /><br /><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">sqldf('select Species, cluster, count(*) from df group by Species, Cluster')</span></span><br /><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><br /></span></span><br /><br /><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">&nbsp;&nbsp; &nbsp; Species cluster count(*)</span></span><br /><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">1 &nbsp; &nbsp; setosa &nbsp; &nbsp; &nbsp; 2 &nbsp; &nbsp; &nbsp; 50</span></span><br /><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">2 versicolor &nbsp; &nbsp; &nbsp; 1 &nbsp; &nbsp; &nbsp; 48</span></span><br /><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">3 versicolor &nbsp; &nbsp; &nbsp; 3 &nbsp; &nbsp; &nbsp; &nbsp;2</span></span><br /><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">4 &nbsp;virginica &nbsp; &nbsp; &nbsp; 1 &nbsp; &nbsp; &nbsp; &nbsp;6</span></span><br /><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">5 &nbsp;virginica &nbsp; &nbsp; &nbsp; 3 &nbsp; &nbsp; &nbsp; 44</span></span><br /><div><br /></div><div>So we grab the outliers into their own data frame....</div><div><br /></div><div><div><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">df2 = sqldf('select * from df where (Species || cluster) in&nbsp;</span></span></div><div><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">&nbsp;&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; (select Species || cluster from df group by Species, Cluster having count(*) &lt; 10)')</span></span></div></div><br /><br />Now we can enhance the previous graph to put a diamond around misclassified points.<br /><br /><br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">l</span><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">ast_plot() + &nbsp;geom_point(data=df2, aes(x=Petal_Length, y=Petal_Width, shape=5, alpha=.7, size=4.5), legend=FALSE)&nbsp;</span></span><br /><br /><div class="separator" style="clear: both; text-align: center;"><a href="http://4.bp.blogspot.com/_FsLa1cMTCWU/TIAmhPpdihI/AAAAAAAAAb0/P4miPDTDvrU/s1600/iris_kmeans_misclassified.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"><img border="0" height="400" src="http://4.bp.blogspot.com/_FsLa1cMTCWU/TIAmhPpdihI/AAAAAAAAAb0/P4miPDTDvrU/s400/iris_kmeans_misclassified.png" width="400" /></a></div><br />And so with a bit of Data Mining knowledge and the R programming language, even our machines can stop and smell the roses...</content>
<link rel="replies" type="application/atom+xml" href="http://www.r-chart.com/feeds/6836947123869493862/comments/default" title="Post Comments"/>
<link rel="replies" type="text/html" href="http://www.r-chart.com/2010/09/bot-botany-k-means-and-ggplot2.html#comment-form" title="1 Comments"/>
<link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/3867310391951630980/posts/default/6836947123869493862"/>
<link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/3867310391951630980/posts/default/6836947123869493862"/>
<link rel="alternate" type="text/html" href="http://www.r-chart.com/2010/09/bot-botany-k-means-and-ggplot2.html" title="Bot Botany - K-Means and ggplot2"/>
<author>
<name>C</name>
<uri>http://www.blogger.com/profile/02893688387818336028</uri>
<email>[email protected]</email>
<gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif"/>
</author>
<media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://3.bp.blogspot.com/_FsLa1cMTCWU/TIBByxcNtzI/AAAAAAAAAcE/SyYEDxjHVs4/s72-c/botbotany.PNG" height="72" width="72"/>
<thr:total>1</thr:total>
</entry>
<entry>
<id>tag:blogger.com,1999:blog-3867310391951630980.post-4802408713335086512</id>
<published>2010-08-31T17:51:00.000-07:00</published>
<updated>2010-08-31T17:51:31.701-07:00</updated>
<category scheme="http://www.blogger.com/atom/ns#" term="Learning Statistics"/>
<title type="text">Better than Average</title>
<content type="html"><span class="Apple-style-span" style="font-family: Arial; font-size: small;"><span class="Apple-style-span" style="font-size: 13px;"></span></span><br /><span class="Apple-style-span" style="font-family: Arial; font-size: small;"></span><br /><span class="Apple-style-span" style="font-family: Arial; font-size: small;"></span><br /><span class="Apple-style-span" style="font-family: Arial; font-size: small;"></span><br /><span class="Apple-style-span" style="font-family: Arial; font-size: small;"><div class="separator" style="clear: both; text-align: center;"><a href="http://4.bp.blogspot.com/_FsLa1cMTCWU/TH2hTwRF3pI/AAAAAAAAAbs/LqE0yGJ3F6c/s1600/MovingAverage.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"><img border="0" height="400" src="http://4.bp.blogspot.com/_FsLa1cMTCWU/TH2hTwRF3pI/AAAAAAAAAbs/LqE0yGJ3F6c/s400/MovingAverage.png" width="400" /></a></div><br /><table><tbody><tr><td><br /><br /><iframe frameborder="0" marginheight="0" marginwidth="0" scrolling="no" src="http://rcm.amazon.com/e/cm?lt1=_blank&amp;bc1=000000&amp;IS2=1&amp;bg1=FFFFFF&amp;fc1=000000&amp;lc1=0000FF&amp;t=rcha-20&amp;o=1&amp;p=8&amp;l=as1&amp;m=amazon&amp;f=ifr&amp;md=10FE9736YVPPT7A0FBG2&amp;asins=0387293175" style="height: 240px; width: 120px;"></iframe><br /><br /></td><td><br /><div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;">The <a href="http://www.nist.gov/index.html">NIST</a>'s&nbsp;<a href="http://www.itl.nist.gov/div898/handbook/index.htm">The Engineering Statistics Handbook</a>&nbsp;includes&nbsp;an&nbsp;<a href="http://www.itl.nist.gov/div898/handbook/pmc/section4/pmc4.htm">Introduction to Time Series Analysis</a>&nbsp;which&nbsp;provides a great way of demonstrating how R can be used to make such calculations. &nbsp;This post replicates the analysis of the data set introduced under&nbsp;<a href="http://www.itl.nist.gov/div898/handbook/pmc/section4/pmc42.htm">Averaging Methods</a>&nbsp;using R. <br /><br />As you might expect, Time Series Analysis is a broad subject that has been investigated in depth elsewhere. &nbsp;If you need more information, a book such as <a href="http://www.amazon.com/dp/0387293175?tag=rcha-20&amp;camp=213381&amp;creative=390973&amp;linkCode=as4&amp;creativeASIN=0387293175&amp;adid=1541HN2AYC61FXKB3NDG&amp;">Time Series Analysis and Its Applications</a>&nbsp;provides a much more in depth look at the mathematical theory involved as well as providing practical examples of the use of R for analysis and forecasting.<br /><br />But back to the NIST handbook... the&nbsp;<a href="http://www.itl.nist.gov/div898/handbook/pmc/section4/pmc42.htm">data set they used</a>&nbsp;represents supplier deliveries to a warehouse. &nbsp;The calculations that follow demonstrate how to perform the calculations they do in this section of the handbook using R.</div><br /></td></tr></tbody></table><br /><br /><div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><b><u>Supplier Amount(in 1000 of $)</u></b></span></span><span class="Apple-tab-span" style="white-space: pre;"><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"> </span></span></span><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">&nbsp;</span></span></div><div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">1</span></span><span class="Apple-tab-span" style="white-space: pre;"><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"> </span></span></span><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">&nbsp;9</span></span><span class="Apple-tab-span" style="white-space: pre;"><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"> </span></span></span><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">&nbsp;</span></span></div><div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">2</span></span><span class="Apple-tab-span" style="white-space: pre;"><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"> </span></span></span><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">&nbsp;8</span></span></div><div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">3</span></span><span class="Apple-tab-span" style="white-space: pre;"><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"> </span></span></span><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">&nbsp;9</span></span><span class="Apple-tab-span" style="white-space: pre;"><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"> </span></span></span><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">&nbsp;</span></span></div><div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">4</span></span><span class="Apple-tab-span" style="white-space: pre;"><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"> </span></span></span><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">&nbsp;12</span></span><span class="Apple-tab-span" style="white-space: pre;"><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"> </span></span></span><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">&nbsp;</span></span></div><div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">5</span></span><span class="Apple-tab-span" style="white-space: pre;"><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"> </span></span></span><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">&nbsp;9</span></span><span class="Apple-tab-span" style="white-space: pre;"><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"> </span></span></span><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">&nbsp;</span></span></div><div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">6</span></span><span class="Apple-tab-span" style="white-space: pre;"><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"> </span></span></span><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">&nbsp;12</span></span><span class="Apple-tab-span" style="white-space: pre;"><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"> </span></span></span><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">&nbsp;</span></span></div><div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">7</span></span><span class="Apple-tab-span" style="white-space: pre;"><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"> </span></span></span><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">&nbsp;11</span></span></div><div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">8</span></span><span class="Apple-tab-span" style="white-space: pre;"><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"> </span></span></span><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">&nbsp;7</span></span></div><div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">9</span></span><span class="Apple-tab-span" style="white-space: pre;"><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"> </span></span></span><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">&nbsp;13</span></span></div><div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">10</span></span><span class="Apple-tab-span" style="white-space: pre;"><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"> </span></span></span><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">&nbsp;9</span></span></div><div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">11</span></span><span class="Apple-tab-span" style="white-space: pre;"><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"> </span></span></span><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">&nbsp;11</span></span></div><div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">12</span></span><span class="Apple-tab-span" style="white-space: pre;"><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"> </span></span></span><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">&nbsp;10</span></span></div><br /><br /><br /><b>Simple Average (Mean)</b><br />In R the series can be represented as a vector.<br /><br /><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">v=c(9,8,9,12,9,12,11,7,13,9,11,10)</span></span><br /><br />The average of the series is 10.<br /><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">mean(v)&nbsp;</span></span><br /><br />The "error" amount that each entry in the vector differs from the mean can be calcuated as follows.<br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: small;">s - mean(s)&nbsp;</span></span><br /><br />This value can serve as the basis for a measure to ascertain how well a model fits (Error Squared).<br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: small;">(v - mean(v))^2&nbsp;</span></span><br /><br />Finally, the sum or mean of these results can be used to compute values that represent the overall fit (or amount of error) for the estimate.<br /><br /><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">sum((v - mean(v))^2) # SSE" is the sum of the squared errors.</span></span><br /><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">mean((v - mean(v))^2) # MSE" is the mean of the squared errors.</span></span><br /><br />Now that we have a simple values that indicate how good an estimate for a set is, we can test with other values. <br />Rather than writing out an entire calculation each time, we can create a function in R and apply the function to each value in a vector.<br /><br /><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">sse = function(x, series){sum((series - x)^2)}</span></span><br /><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">mse = function(x, series){mean((series - x)^2)}</span></span><br /><br />To compare the estimate (10) with 7, 9, and 12.<br /><br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: small;">sapply(c(7,9,10,12),sse,v)</span></span><br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: small;">sapply(c(7,9,10,12),mse,v)</span></span><br /><br /><b>Analyzing Time Series Data</b><br />A <a href="http://en.wikipedia.org/wiki/Time_series">time series</a> is simply a sequence of data points in time. &nbsp;Time series data has unique characteristics which allow it&nbsp;to be processed in a similar manner regardless of the underlying data represented. &nbsp;Many disciplines deal with this type of data including&nbsp;statistics, signal processing, econometrics and mathematical finance. &nbsp;Such data appears in business in relationship to sales forecasting, budgetary analysis, yield projections, and in the process / quality control arena.&nbsp;In other blog entries, they are used in relation to <a href="http://www.r-chart.com/2010/06/stock-analysis-using-r.html">stock market&nbsp;analysis</a> and <a href="http://www.r-chart.com/2010/06/world-bank-api-r-package-available.html">economic data</a>. &nbsp;They are relevant to web sites and are available through tools like Google Analytics. <br /><br />So time series data is widely applicable but has common features regardless of its application. &nbsp;It can be <i>analyzed</i> to identify its characteristics and patterns. &nbsp;This often leads to <i>forecasting </i>in which a model is used to<br />predict future events based upon past data.<br /><br />All time series data has the following common qualities:<br /><br /><ul><li>a natural temporal ordering</li><li>often events that are close together are generally more closely related than those further apart</li><li>in most cases, past values are assumed to influence future values (rather than the other way around)</li><li>usually spaced at uniform intervals</li></ul><br />The data set we are working with is a bit odd to consider as a time series - a supplier is not a unit of time. &nbsp;However, it is useful for making the point that a &nbsp;"simple" average (or mean)&nbsp;of all past observations is only a useful estimate for when there are no trends. &nbsp;Not sure what to make of this. &nbsp;I emailed the government and asked for clarification. &nbsp;Will post the answer here if I receive a response.<br /><br /><br /><br /><br /><div><br /></div><br />In R, a vector can be cast to a time series object as follows:<br /><br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: small;">s=as.ts(c(9,8,9,12,9,12,11,7,13,9,11,10))</span>&nbsp;</span><br /><br /><b>Moving Average</b><br />A <a href="http://en.wikipedia.org/wiki/Moving_average">moving average</a> is described in the <a href="http://www.itl.nist.gov/div898/handbook/pmc/section4/pmc421.htm">NIST Handbook</a>&nbsp;and is also referred to as "smoothing" - a term that comes up in ggplot2 (geom_smooth). &nbsp;There are a myriad of functions available in R that involves some sort of lagged calculation of a series of numbers. &nbsp;A simple example that almost does the trick involves rollapply:<br /><br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: small;">rollapply(s, 3, mean)</span></span><br /><br />This works, but it is not clear that the first two entries were skipped. &nbsp;Better to use a library that has additional checks coded in...<br /><br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: small;">library(TTR)</span></span><br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: small;">SMA(s,3)</span></span><br /><br />If you take a&nbsp;look at the code inside... you can get an idea of the additional verification and error checking (which accounts for missing values at the beginning of the list). &nbsp;To view the source, simply input the function name without any parenthesis:<br /><br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: small;">SMA</span></span><br /><br />You can drill down into the internally called methods in this case:<br /><br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: small;">runMean</span></span><br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: small;">runSum</span></span><br /><br />With this method available, we can calculate the Error and the Error Squared:<br /><br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: small;">s - SMA(s,3) &nbsp; &nbsp; &nbsp;# Error</span></span><br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: small;">(s - SMA(s,3))^2 &nbsp;# Error Squared</span></span><br /><br />Note that the calculated mean replaced missing entries as zeroes...<br /><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">x=((s - SMA(s,3))^2)</span></span><br /><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">x[ is.na(x) ] &lt;- 0</span></span><br /><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">mean(x)</span></span><br /><br /><div><br /></div><div>Oh - in case you were interested in the plot:</div><div><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace; font-size: small;">library(ggplot2)</span></div><div><div><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;">df = as.data.frame(as.ts(v))</span></span></div><div><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace; font-size: small;">df$idx = as.numeric(rownames(df))</span></div><div><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace; font-size: small;"></span><br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace; font-size: small;"></span><br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace; font-size: small;"></span><br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace; font-size: small;"><div>df$x= as.numeric(df$x)</div><div>qplot(data=df, idx, x) + geom_line() + geom_smooth()</div><div><br /></div><div><span class="Apple-style-span" style="font-family: Arial;"><span class="Apple-style-span" style="font-size: medium;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: small;"><br /></span></span></span></span></div></span></div></div></span></content>
<link rel="replies" type="application/atom+xml" href="http://www.r-chart.com/feeds/4802408713335086512/comments/default" title="Post Comments"/>
<link rel="replies" type="text/html" href="http://www.r-chart.com/2010/08/better-than-average.html#comment-form" title="0 Comments"/>
<link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/3867310391951630980/posts/default/4802408713335086512"/>
<link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/3867310391951630980/posts/default/4802408713335086512"/>
<link rel="alternate" type="text/html" href="http://www.r-chart.com/2010/08/better-than-average.html" title="Better than Average"/>
<author>
<name>C</name>
<uri>http://www.blogger.com/profile/02893688387818336028</uri>
<email>[email protected]</email>
<gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif"/>
</author>
<media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://4.bp.blogspot.com/_FsLa1cMTCWU/TH2hTwRF3pI/AAAAAAAAAbs/LqE0yGJ3F6c/s72-c/MovingAverage.png" height="72" width="72"/>
<thr:total>0</thr:total>
</entry>
<entry>
<id>tag:blogger.com,1999:blog-3867310391951630980.post-7919296186803098682</id>
<published>2010-08-27T15:21:00.000-07:00</published>
<updated>2010-08-27T15:21:38.912-07:00</updated>
<category scheme="http://www.blogger.com/atom/ns#" term="ggplot2"/>
<category scheme="http://www.blogger.com/atom/ns#" term="fractals"/>
<title type="text">Fractals in R</title>
<content type="html"><div class="separator" style="clear: both; text-align: center;"><a href="http://4.bp.blogspot.com/_FsLa1cMTCWU/THg4nXR4RCI/AAAAAAAAAbU/N6EoNqOAPrQ/s1600/mandelbrot1.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"><img border="0" height="357" src="http://4.bp.blogspot.com/_FsLa1cMTCWU/THg4nXR4RCI/AAAAAAAAAbU/N6EoNqOAPrQ/s400/mandelbrot1.png" width="400" /></a></div><br /><br /><div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"><a href="http://users.utu.fi/attenka/">Atte Tenkanen</a>&nbsp;had&nbsp;<a href="http://fractalswithr.blogspot.com/">a blog on fractals</a>&nbsp;using R for a time.&nbsp;Much of his source code is still available online. &nbsp;To produce his version of the Mandelbrot set:</div><div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"><br /></div><div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: small;">source('<a href="http://users.utu.fi/attenka/mandelbrot_set.R">http://users.utu.fi/attenka/mandelbrot_set.R</a>')</span></span></div><br /><br /><a href="http://en.wikipedia.org/wiki/Fractal">Fractals</a>&nbsp;(such as the&nbsp;<a href="http://en.wikipedia.org/wiki/Mandelbrot_set">Mandelbrot Set</a>&nbsp;pictured above) &nbsp;are objects that display self-similarity on all scales. &nbsp;Fractal are mathematical concepts with practical applications. For example, fractal dimensions provide a solution to measuring a coastline - where you come up with different lengths depending upon the length of the ruler you use. (the <a href="http://en.wikipedia.org/wiki/Coastline_paradox">Coastline Paradox</a>).<br /><br />There also pretty and fun to look at....<br /><br />Back in the 80's my little brother and I would type in fractal equations into a Radio Shack Color Computer. &nbsp;We would spend half a day typing in a program and debugging it and the other half a day waiting for the image to render. &nbsp;He later improved the process by using a Commodore Amiga and more than two fingers for typing.<br /><div class="separator" style="clear: both; text-align: center;"><a href="http://4.bp.blogspot.com/_FsLa1cMTCWU/THg4xbToDJI/AAAAAAAAAbc/FGTObobpj_c/s1600/mandelbrot_ggplot2.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"><img border="0" height="400" src="http://4.bp.blogspot.com/_FsLa1cMTCWU/THg4xbToDJI/AAAAAAAAAbc/FGTObobpj_c/s400/mandelbrot_ggplot2.png" width="400" /></a></div>You can also do this type of plot using ggplot2 - and most of the effort is related to turning off axes and legends. &nbsp;The source is on github and can be run from there.<br /><br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: small;">source('<a href="http://github.com/ezgraphs/R-Programs/raw/master/mandelbrot.R">http://github.com/ezgraphs/R-Programs/raw/master/mandelbrot.R</a>')</span></span><br /><br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Times New Roman';"><span class="Apple-style-span" style="font-size: medium;">So now I can download source code from somewhere out on the interweb thingy and use open source software to render these images in a matter of seconds. &nbsp;</span></span></span></span><br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Times New Roman';"><span class="Apple-style-span" style="font-size: medium;"><br /></span></span></span></span><br /><span class="Apple-style-span" style="font-family: 'Courier New', Courier, monospace;"><span class="Apple-style-span" style="font-size: small;"><span class="Apple-style-span" style="font-family: 'Times New Roman';"><span class="Apple-style-span" style="font-size: medium;">Just like we used to do when we were kids....</span></span></span></span></content>
<link rel="replies" type="application/atom+xml" href="http://www.r-chart.com/feeds/7919296186803098682/comments/default" title="Post Comments"/>
<link rel="replies" type="text/html" href="http://www.r-chart.com/2010/08/fractals-in-r.html#comment-form" title="2 Comments"/>
<link rel="edit" type="application/atom+xml" href="http://www.blogger.com/feeds/3867310391951630980/posts/default/7919296186803098682"/>
<link rel="self" type="application/atom+xml" href="http://www.blogger.com/feeds/3867310391951630980/posts/default/7919296186803098682"/>
<link rel="alternate" type="text/html" href="http://www.r-chart.com/2010/08/fractals-in-r.html" title="Fractals in R"/>
<author>
<name>C</name>
<uri>http://www.blogger.com/profile/02893688387818336028</uri>
<email>[email protected]</email>
<gd:image rel="http://schemas.google.com/g/2005#thumbnail" width="16" height="16" src="http://img2.blogblog.com/img/b16-rounded.gif"/>
</author>
<media:thumbnail xmlns:media="http://search.yahoo.com/mrss/" url="http://4.bp.blogspot.com/_FsLa1cMTCWU/THg4nXR4RCI/AAAAAAAAAbU/N6EoNqOAPrQ/s72-c/mandelbrot1.png" height="72" width="72"/>
<thr:total>2</thr:total>
</entry>
</feed>