diff --git a/datavis-survey.html b/datavis-survey.html index 1994695..940377d 100644 --- a/datavis-survey.html +++ b/datavis-survey.html @@ -430,6 +430,6 @@

Survey Analysis Example

diff --git a/enjalot-tweets.html b/enjalot-tweets.html index 60ac0ca..c0c2d65 100644 --- a/enjalot-tweets.html +++ b/enjalot-tweets.html @@ -17,10 +17,10 @@ + - @@ -39,13 +39,13 @@ + - @@ -79,6 +79,12 @@ registerFile("./data/enjalot-tweets/scopes-001-input.parquet", {"name":"./data/enjalot-tweets/scopes-001-input.parquet","path":"./_file/data/enjalot-tweets/scopes-001-input.7f644919.parquet","lastModified":1714776460422}); registerFile("./data/enjalot-tweets/scopes-001.json", {"name":"./data/enjalot-tweets/scopes-001.json","mimeType":"application/json","path":"./_file/data/enjalot-tweets/scopes-001.fe2ed940.json","lastModified":1714776460399}); +define({id: "755f6a01", inline: true, inputs: ["max","da","min","display"], body: async (max,da,min,display) => { +display(await( +max(da, d => d.year) - min(da, d => d.year) +)) +}}); + define({id: "aa68ff38", inputs: ["Plot","hulls","da","display"], body: async (Plot,hulls,da,display) => { display(await( Plot.plot({ @@ -114,16 +120,80 @@ )) }}); -define({id: "92fb4788", inputs: ["view","Inputs","scope"], outputs: ["selcluster"], body: (view,Inputs,scope) => { -const selcluster = view(Inputs.select(scope.cluster_labels_lookup, { value: d => d.cluster, format: x => x.cluster + ": " + x.label, label: "Cluster:"})) -return {selcluster}; +define({id: "8ff0b1db", inline: true, inputs: ["clusterTableData","display"], body: async (clusterTableData,display) => { +display(await( +clusterTableData.length +)) +}}); + +define({id: "c133b744", inline: true, inputs: ["da","display"], body: async (da,display) => { +display(await( +da.length +)) +}}); + +define({id: "85389ac9", inline: true, inputs: ["Inputs","da","tableConfig","display"], body: async (Inputs,da,tableConfig,display) => { +display(await( +Inputs.table(da, {...tableConfig, columns: [ + "cluster", + "label", + "full_text", + "favorite_count", + "retweet_count", + "tweet_id" + ], + width: { + ...tableConfig.width, + full_text: "40%", + tweet_id: 70, + cluster: 50, + label: 200 + } +}) +)) +}}); + +define({id: "8ff0b1db-1", inline: true, inputs: ["clusterTableData","display"], body: async (clusterTableData,display) => { +display(await( +clusterTableData.length +)) }}); -define({id: "e83054f4", inline: true, inputs: ["clusterCard","selcluster","barPlot","tableConfig","da","scope","hulls","display"], body: async (clusterCard,selcluster,barPlot,tableConfig,da,scope,hulls,display) => { +define({id: "ec6d2531", inputs: ["scope","da","sum","min","max","view","Inputs","sparkbar"], outputs: ["clusterTableData","selclusterTable"], body: (scope,da,sum,min,max,view,Inputs,sparkbar) => { +const clusterTableData = scope.cluster_labels_lookup.map(c => { + let dc = da.filter(d => d.cluster == c.cluster) + return { + cluster: c.cluster, + label: c.label, + count: dc.length, + favorites: sum(dc, d => d.favorite_count), + retweets: sum(dc, d => d.retweet_count), + min_date: min(dc, d => d.created_at), + max_date: max(dc, d => d.created_at), + } +}) +const selclusterTable = view(Inputs.table(clusterTableData, { + format: { + "favorites": sparkbar(max(clusterTableData, d => d.favorites), "lightblue"), + "retweets": sparkbar(max(clusterTableData, d => d.retweets), "orange"), + }, + width: { + "cluster": 50, + "label": "20%" + }, + sort: "favorites", + reverse: true, + multiple: false, + value: clusterTableData[62] +})) +return {clusterTableData,selclusterTable}; +}}); + +define({id: "a2fdee09", inline: true, inputs: ["clusterCard","selclusterTable","barPlot","tableConfig","da","scope","hulls","display"], body: async (clusterCard,selclusterTable,barPlot,tableConfig,da,scope,hulls,display) => { display(await( -clusterCard(selcluster.cluster, { +clusterCard(selclusterTable.cluster, { description: "", - plot: barPlot(selcluster.cluster, { field: "year"}), + plot: barPlot(selclusterTable.cluster), tableConfig, da, scope, @@ -136,17 +206,28 @@ // ---------------------------------------------------------- }}); -define({id: "7b0ca756", outputs: ["tableConfig"], body: () => { +define({id: "6933f8ba", inputs: ["htl"], outputs: ["tableConfig"], body: (htl) => { const tableConfig = { columns: [ "full_text", "created_at", - "type", + "favorite_count", + "retweet_count", + "tweet_id", ], + header: { + "tweet_id": "link" + }, + format: { + "tweet_id": x => htl.html`tweet` + }, width: { - "full_text": "60%" + "full_text": "60%", + "favorite_count": 50, + "retweet_count": 50, + "tweet_id": 60 }, - sort: "created_at", + sort: "favorite_count", reverse: true, rows: 15 } @@ -214,6 +295,21 @@ return {barPlot}; }}); +define({id: "5d395032", inputs: ["htl"], outputs: ["sparkbar"], body: (htl) => { +function sparkbar(max, color) { + return x => htl.html`
${x.toLocaleString("en")}` +} +return {sparkbar}; +}}); + define({id: "77f718ba", outputs: ["canvas"], body: () => { const canvas = document.createElement("canvas") return {canvas}; @@ -248,9 +344,9 @@ return {scope}; }}); -define({id: "bee957c8", inputs: ["db"], outputs: ["data"], body: (db) => { +define({id: "74e9b455", inputs: ["db"], outputs: ["data"], body: (db) => { // const rows = db.sql`SELECT * FROM input` -const data = db.sql`SELECT * FROM scope` +const data = db.sql`SELECT *,id::string AS tweet_id FROM scope` return {data}; }}); @@ -265,11 +361,11 @@ console.log("hulls!", hulls) }}); -define({id: "8f058b05", outputs: ["scatter","hull","tooltip","clusterCard","markdownit"], body: async () => { -const [{scatter}, {hull}, {tooltip}, {clusterCard}, {default: markdownit}] = await Promise.all([import("./_import/components/scatter.e6a504c8.js"), import("./_import/components/hull.532abe46.js"), import("./_import/components/tooltip.c593267a.js"), import("./_import/components/clusterCard.bd6879f9.js"), import("./_node/markdown-it@14.0.0/index.js")]); +define({id: "4f5bc22b", outputs: ["scatter","hull","tooltip","clusterCard","markdownit","min","max","sum"], body: async () => { +const [{scatter}, {hull}, {tooltip}, {clusterCard}, {default: markdownit}, {min, max, sum}] = await Promise.all([import("./_import/components/scatter.e6a504c8.js"), import("./_import/components/hull.532abe46.js"), import("./_import/components/tooltip.c593267a.js"), import("./_import/components/clusterCard.bd6879f9.js"), import("./_node/markdown-it@14.0.0/index.js"), import("./_npm/d3-array@3.2.4/_esm.js")]); -// import matter from "npm:gray-matter"; -return {scatter,hull,tooltip,clusterCard,markdownit}; + +return {scatter,hull,tooltip,clusterCard,markdownit,min,max,sum}; }}); define({id: "2af7ab58", inputs: ["markdownit"], outputs: ["Markdown","md"], body: (markdownit) => { @@ -354,36 +450,58 @@
-

enjalot's tweets

-

Looking in the mirror of 10,000 tweets

+

Looking in the mirror of years and 10,965 tweets.

+

Oh god this could get embarrassing. +The problem with powerful visualization tools is that they will show you things whether you wanted to see them or not. +For most of my career I've used Twitter mainly in a professional capacity, but as we will see my younger self was sometimes a bit flippant.

+

Social media is of course a place where we run into unmanageable amounts of unstructured text data. +As we've seen in the Datavis Survey and GitHub Issues analyses, we can use Latent Scope to pull some structure out of the text and combine it with whatever interesting metadata we already have.

+

I don't expect you to care, let alone read, all of my tweets, so this time we'll jump straight into the map and start exploring clusters. +Hopefully displaying my shame will serve as an example of the kinds of insights one might gain from clustering and visualizing their own textual data.

+

If you still want to analyze your own tweets by the end of this, you can follow the instructions in this notebook to download and then process your tweets into a format that matches this analysis.

-
+

Each of the dots in the above map is a tweet, and all those tweets went through the 4 step process in Latent scope:

+
    +
  1. Embed - run each piece of text through an embedding model
  2. +
  3. Project - run the high-dimensional embeddings through UMAP
  4. +
  5. Cluster - run the 2-dimensional UMAP coordinates through HDBSCAN
  6. +
  7. Label - ask an LLM to create a label by summarizing a list of text taken from each cluster
  8. +
+

So at the end of this process we have clusters carving up our tweets. +Every row of our input data is annotated with a cluster index and label:

+
+ +
+

clusters is better than 10,000 but it's still quite a lot. +Let's use some common metrics to explore our clusters, namely likes and rewteets.

+

Click on the radio button on the left of each cluster to select it and see the details in the card below

+
- +
-
+
+
-
+
-
+
diff --git a/explore-and-curate.html b/explore-and-curate.html index d5323cf..7301189 100644 --- a/explore-and-curate.html +++ b/explore-and-curate.html @@ -141,6 +141,6 @@

-
Built with Observable on May 6, 2024.
+
Built with Observable on May 6, 2024.
diff --git a/exporting-data.html b/exporting-data.html index 855a2ad..04b886b 100644 --- a/exporting-data.html +++ b/exporting-data.html @@ -163,6 +163,6 @@

T diff --git a/index.html b/index.html index 5b9df00..331fb7c 100644 --- a/index.html +++ b/index.html @@ -116,6 +116,6 @@

diff --git a/install-and-config.html b/install-and-config.html index 3b6c009..60cc6fc 100644 --- a/install-and-config.html +++ b/install-and-config.html @@ -123,6 +123,6 @@

-
Built with Observable on May 6, 2024.
+
Built with Observable on May 6, 2024.
diff --git a/plot-issues.html b/plot-issues.html index 2efca07..9f3fbf2 100644 --- a/plot-issues.html +++ b/plot-issues.html @@ -572,6 +572,6 @@

-
Built with Observable on May 6, 2024.
+
Built with Observable on May 6, 2024.
diff --git a/us-federal-laws.html b/us-federal-laws.html index fb4ea32..beeebba 100644 --- a/us-federal-laws.html +++ b/us-federal-laws.html @@ -597,6 +597,6 @@

-
Built with Observable on May 6, 2024.
+
Built with Observable on May 6, 2024.
diff --git a/your-first-scope.html b/your-first-scope.html index 7b8dfa3..23b81b5 100644 --- a/your-first-scope.html +++ b/your-first-scope.html @@ -155,6 +155,6 @@

-
Built with Observable on May 6, 2024.
+
Built with Observable on May 6, 2024.