Skip to content

Commit

Permalink
Add files via upload
Browse files Browse the repository at this point in the history
  • Loading branch information
staeiou authored Jan 29, 2024
1 parent 4e5c25c commit 4ad3e43
Show file tree
Hide file tree
Showing 2 changed files with 169 additions and 27 deletions.
59 changes: 44 additions & 15 deletions assets/wos/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
<html lang="en">
<head>
<meta charset="UTF-8">
<title>Web of Science Outgoing Citation Analyzer</title>
<title>Web of Science Citation Analyzer</title>
<style>
.table-container {
margin-top: 20px;
Expand All @@ -24,10 +24,13 @@
border: 1px solid black;
}

th, td {
padding: 8px;
text-align: left;
}
th, td {
word-wrap: break-word; /* Ensures text wraps in cells */
overflow: hidden; /* Hide overflowed content */
max-width: 200px; /* Set a maximum width for each cell */
padding: 8px;
text-align: left;
}

th {
background-color: #f2f2f2;
Expand All @@ -42,12 +45,13 @@
text-decoration: none;
color: blue;
}

</style>

</head>
<body>
<h1>Web of Science Outgoing Citation Analyzer</h1>
<p> In Web of Science, it is easy to analyze incoming citations, or citations *TO* a set of articles found in any search query. It is much harder to analyze outgoing citations, or citations *FROM* a set of articles found in any search query. This site lets you upload exported Plain Text results and analyzes the most common citations those articles make. See <a href="https://ucsd.zoom.us/rec/share/Zl1EP5h3wRbdv9lElCS0AwYZuVUGP0zAjzCq_TtEE0b_dnQIzPyPuTO9rmcoQ1j5.P8PVC9vge56r8ziD?startTime=1705972024000">demo video here</a>.</p>
<h1>Web of Science Citation Analyzer</h1>

<input type="file" id="fileInput" multiple accept=".txt">
<button id="processButton">Process Files</button>
<button id="resetButton">Reset</button> <!-- Reset button -->
Expand All @@ -59,26 +63,51 @@ <h1>Web of Science Outgoing Citation Analyzer</h1>
<progress id="fileProgress" value="0" max="100"></progress>
</div>
<div id="navigation">
<a href="#authorsTable">Most Cited First Authors</a>
<a href="#authorsTable">Most Cited Authors</a>
<a href="#titlesTable">Most Cited Titles</a>
<a href="#publicationsTable">Most Cited Publications</a>
<a href="#keywordsTable">Most Common Author-Supplied Keywords</a>
<a href="#oneGramTable">Title & Abstract: Most Common 1-Grams</a>
<a href="#twoGramTable">Title & Abstract: Most Common 2-Grams</a>
<a href="#threeGramTable">Title & Abstract: Most Common 3-Grams</a>
</div>

<h2>Most Cited Authors</h2>
<div class="table-container" id="authorsContainer">
<h2>Most Cited First Authors (WoS exports only include first author)</h2>

<table id="authorsTable"></table>
</div>


<h2>Most Cited Titles</h2>
<div class="table-container" id="titlesContainer">
<h2>Most Cited Titles</h2>

<table id="titlesTable"></table>
</div>

<div class="table-container" id="publicationsContainer">
<h2>Most Cited Publications</h2>
<div class="table-container" id="publicationsContainer">

<table id="publicationsTable"></table>
</div>
<p>By R. Stuart Geiger, (C) 2024. Freely licensed under <a href="https://www.gnu.org/licenses/agpl-3.0.en.html">the AGPL v3.0</a>. Published on <a href="https://github.com/staeiou/staeiou.github.io/tree/master/assets/wos">GitHub</a>.</p>

<h2>Most Common Author-Supplied Keywords</h2>

<div class="table-container" id="keywordsContainer">
<table id="keywordsTable"></table>
</div>
<h2>Title & Abstract: Most Common 1-Grams (words)</h2>

<div class="table-container" id="oneGramContainer">
<table id="oneGramTable"></table>
</div>
<h2>Title & Abstract: Most Common 2-Grams</h2>

<div class="table-container" id="twoGramContainer">
<table id="twoGramTable"></table>
</div>
<h2>Title & Abstract: Most Common 3-Grams</h2>

<div class="table-container" id="threeGramContainer">
<table id="threeGramTable"></table>
</div>
<script src="script.js"></script>
</body>
</html>
137 changes: 125 additions & 12 deletions assets/wos/script.js
Original file line number Diff line number Diff line change
@@ -1,3 +1,60 @@
const commonWords = new Set([
'the', 'of', 'and', 'a', 'to', 'in', 'is', 'you', 'that', 'it', 'he', 'was', 'for', 'on', 'are', 'as', 'with', 'his', 'they', 'I', 'at', 'be', 'this', 'have', 'from', 'or', 'one', 'had', 'by', 'word', 'but', 'not', 'what', 'all', 'were', 'we', 'when', 'your', 'can', 'said', 'there', 'use', 'an', 'each', 'which', 'she', 'do', 'how', 'their', 'if', 'will', 'up', 'about', 'out', 'many', 'then', 'them', 'these', 'so', 'some', 'her', 'would', 'make', 'like', 'him', 'into', 'time', 'has', 'more', 'go', 'see', 'no', 'way', 'could', 'my', 'than', 'first', 'been', 'call', 'who', 'its', 'now', 'find', 'long', 'down', 'did', 'get', 'come', 'made', 'may', 'part', 'through'
]);


function extractKeywords(text) {
const keywords = {};
text.toLowerCase().split('\n').forEach(line => {
if (line.startsWith('de ')) {
line.substring(3).split(';').forEach(keyword => {
const trimmed = keyword.trim();
if (trimmed) {
keywords[trimmed] = (keywords[trimmed] || 0) + 1;
}
});
}
});
return keywords;
}

function cleanText(text) {
return text.toLowerCase().replace(/[^a-z0-9\s]/gi, '');
}
// Function to generate n-grams
function generateNGrams(text, n) {
const words = cleanText(text).split(/\s+/);
const ngrams = {};
for (let i = 0; i <= words.length - n; i++) {
const ngram = words.slice(i, i + n).join(' ');
if (!ngram.split(' ').some(word => commonWords.has(word))) {
ngrams[ngram] = (ngrams[ngram] || 0) + 1;
}
}
return ngrams;
}

function extractNGrams(text) {
let allText = '';
text.split('\n').forEach(line => {
if (line.startsWith('TI ') || line.startsWith('AB ')) {
allText += ' ' + line.substring(3);
}
});

const ngrams = {
'1-gram': generateNGrams(allText, 1),
'2-gram': generateNGrams(allText, 2),
'3-gram': generateNGrams(allText, 3)
};

return ngrams;
}

function sortAndLimitCounts(counts, limit) {
return Object.entries(counts).sort((a, b) => b[1] - a[1]).slice(0, limit);
}

function normalizeAuthorName(name) {
// Remove punctuation and trim whitespace
let normalized = name.replace(/[.,]/g, '').trim();
Expand Down Expand Up @@ -107,6 +164,41 @@ function populateTable(tableId, data) {
}
}

function populateNGramsTable(tableId, data) {
const table = document.getElementById(tableId);
table.innerHTML = ''; // Clear existing content

// Create headers
const header = table.createTHead();
const headerRow = header.insertRow();
headerRow.insertCell(0).textContent = 'N-Gram';
headerRow.insertCell(1).textContent = 'Item';
headerRow.insertCell(2).textContent = 'Count';

const tbody = table.createTBody();

// Iterate over each n-gram category (1-gram, 2-gram, 3-gram)
for (const [ngramType, ngramData] of Object.entries(data)) {
for (const [item, count] of ngramData) {
const row = tbody.insertRow();
row.insertCell(0).textContent = ngramType;
row.insertCell(1).textContent = item;
row.insertCell(2).textContent = count;
}
}
}

function populateNGramTable(tableId, data) {
const table = document.getElementById(tableId);
table.innerHTML = '<tr><th>Item</th><th>Count</th></tr>'; // Add header row

for (let [item, count] of data) {
const row = table.insertRow();
row.insertCell(0).textContent = item;
row.insertCell(1).textContent = count;
}
}

function updateProgressBar(percentage) {
const progressBar = document.getElementById('fileProgress');
const progressContainer = document.getElementById('progressContainer');
Expand All @@ -123,30 +215,37 @@ function processFiles() {
let files = document.getElementById('fileInput').files;
if (files.length === 0) return;

updateProgressBar(0); // Initialize progress bar
updateProgressBar(0);
let allReferences = [];
let allTextForNGrams = '';
let keywordCounts = {};
let filesProcessed = 0;
let isValidFile = true; // Flag to track if files are valid
let isValidFile = true;

Array.from(files).forEach(file => {
let reader = new FileReader();
reader.onload = function(e) {
// Check if the file content starts with the required header
if (!e.target.result.startsWith("FN Clarivate Analytics Web of Science")) {
alert("Error: File '" + file.name + "' does not appear to be a Web of Science Plain Text export. The text file should begin with 'FN Clarivate Analytics Web of Science'");
alert("Error: File '" + file.name + "' does not appear to be a Web of Science Plain Text export.");
isValidFile = false;
updateProgressBar(100); // Reset progress bar
return; // Stop processing this file
updateProgressBar(100);
return;
}

// If the file is valid, process its content
if (isValidFile) {
let references = parseCitedReferences(e.target.result);
let fileText = e.target.result;
let references = parseCitedReferences(fileText);
allReferences.push(...references);

let fileKeywords = extractKeywords(fileText);
for (const [key, value] of Object.entries(fileKeywords)) {
keywordCounts[key] = (keywordCounts[key] || 0) + value;
}
allTextForNGrams += ' ' + fileText;

filesProcessed++;
}

// Update the progress bar and process citations when all files are processed
if (filesProcessed === files.length && isValidFile) {
let authorCounts = countCitations(allReferences, 'author');
let titleCounts = countCitations(allReferences, 'title');
Expand All @@ -160,22 +259,36 @@ function processFiles() {
let sortedTitles = sortAndConvertCounts(titleCounts);
let sortedPublications = sortAndConvertCounts(publicationCounts);

let sortedKeywords = sortAndConvertCounts(keywordCounts);
//let ngramCounts = extractNGrams(allTextForNGrams);
//let sortedNGrams = Object.fromEntries(Object.entries(ngramCounts).map(([n, counts]) => [n, sortAndLimitCounts(counts, 50)]));

let ngramCounts = extractNGrams(allTextForNGrams);

let sorted1Grams = sortAndLimitCounts(ngramCounts['1-gram'], 50);
let sorted2Grams = sortAndLimitCounts(ngramCounts['2-gram'], 50);
let sorted3Grams = sortAndLimitCounts(ngramCounts['3-gram'], 50);

populateNGramTable('oneGramTable', sorted1Grams);
populateNGramTable('twoGramTable', sorted2Grams);
populateNGramTable('threeGramTable', sorted3Grams);

populateTable('authorsTable', sortedAuthors);
populateTable('titlesTable', sortedTitles);
populateTable('publicationsTable', sortedPublications);
populateTable('keywordsTable', sortedKeywords);
//populateNGramsTable('ngramsTable', sortedNGrams);
}

updateProgressBar((filesProcessed / files.length) * 100);
};
reader.onerror = () => {
alert("Error reading file: " + file.name);
updateProgressBar(100); // Reset progress bar
updateProgressBar(100);
};
reader.readAsText(file);
});
}



document.getElementById('processButton').addEventListener('click', processFiles);
document.getElementById('resetButton').addEventListener('click', resetApplication);

0 comments on commit 4ad3e43

Please sign in to comment.