Skip to content

Commit

Permalink
Merge pull request #34 from migrants-and-the-state/feature/init-searc…
Browse files Browse the repository at this point in the history
…h-14

Feature/init search 14
  • Loading branch information
mnyrop authored Jan 31, 2025
2 parents 3d922c8 + 6cc628e commit 404e824
Show file tree
Hide file tree
Showing 11 changed files with 2,851 additions and 3,455 deletions.
540 changes: 461 additions & 79 deletions package-lock.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
"vite": "^6.0.0"
},
"dependencies": {
"flexsearch": "^0.7.43",
"minisearch": "^7.1.1",
"mirador": "^3.3.0",
"mirador-image-tools": "^0.11.0",
"vite-plugin-commonjs": "^0.10.4"
Expand Down
11 changes: 2 additions & 9 deletions src/lib/components/search/AFile.svelte
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
<script>
import { base } from '$app/paths';
import { handleSubmit } from '$lib/search';
import { fieldsToSearchWithin } from '$lib/fields';
import ButtonControls from './forms/ButtonControls.svelte';
import { FolderShared as AFileIcon } from 'carbon-icons-svelte';
Expand Down Expand Up @@ -44,15 +45,7 @@
bind:selectedIds={selectedFields}
size="lg"
sortItem={() => {}}
items={[
{ id: 'A-Number', text: 'A-Number (NARA)' },
{ id: 'CoB', text: 'Country of birth (NARA)' },
{ id: 'Last Name', text: 'Last name (NARA)' },
{ id: 'First Name', text: 'First name (NARA)' },
{ id: 'PoE', text: 'Port of entry (NARA)' },
{ id: 'Doc Types', text: 'Document Types (CNN)' },
{ id: 'Form Titles', text: 'Form Titles (LLM)' }
]}
items={fieldsToSearchWithin['afile']}
/>
</div>
<div class="basis-2/3">
Expand Down
8 changes: 2 additions & 6 deletions src/lib/components/search/Page.svelte
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
<script>
import { base } from '$app/paths';
import { handleSubmit } from '$lib/search';
import { fieldsToSearchWithin } from '$lib/fields';
import ButtonControls from './forms/ButtonControls.svelte';
import { Document as PageIcon } from 'carbon-icons-svelte';
Expand Down Expand Up @@ -43,12 +44,7 @@
bind:selectedIds={selectedFields}
size="lg"
sortItem={() => {}}
items={[
{ id: 'Page Text', text: 'Page Text (OCR)' },
{ id: 'Countries', text: 'Countries (NLP)' },
{ id: 'Form Title', text: 'Form Title (LLM)' },
{ id: 'Years', text: 'Years (NLP)' }
]}
items={fieldsToSearchWithin['page']}
/>
</div>
<div class="basis-2/3">
Expand Down
121 changes: 121 additions & 0 deletions src/lib/fields.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
const afileFieldMap = {
id: {
text: 'A-Number',
keypath: 'id'
},
page_count: {
text: 'Page Count',
keypath: 'page_count'
},
last_name_nara: {
text: 'Last Name (NARA)',
keypath: 'fields.last_name.nara'
},
first_name_nara: {
text: 'First Name (NARA)',
keypath: 'fields.first_name.nara'
},
dob_nara: {
text: 'Date of Birth (NARA)',
keypath: 'fields.dob.nara'
},
doe_nara: {
text: 'Date of Entry (NARA)',
keypath: 'fields.doe.nara'
},
poe_nara: {
text: 'Port of Entry (NARA)',
keypath: 'fields.poe.nara'
},
cob_nara: {
text: 'Country of Birth (NARA)',
keypath: 'fields.cob.nara'
},
sex_nara: {
text: 'Sex (NARA)',
keypath: 'fields.sex.nara'
},
sex_ms: {
text: 'Sex (LLM)',
keypath: 'fields.sex.ms_sex_llm_v1'
},
form_titles_llm: {
text: 'Form Titles (LLM)',
keypath: 'fields.form_titles.ms_form_titles_llm_v1'
},
countries_nlp: {
text: 'Countries (LLM)',
keypath: 'fields.countries.ms_countries_nlp_v1'
}
};

const pageFieldMap = {
id: {
text: 'Page ID',
keypath: 'id'
},
anumber: {
text: 'A-Number',
keypath: 'anumber'
},
full_text: {
text: 'Full Text (OCR)',
keypath: 'full_text'
},
sex_ms: {
text: 'Sex (LLM)',
keypath: 'fields.sex.ms_sex_llm_v1'
},
doctype_ms: {
text: 'Document Type (LLM)',
keypath: 'fields.doctype.ms_doctype_llm_v1'
},
form_title_llm: {
text: 'Form Title (LLM)',
keypath: 'fields.form_title.ms_form_title_llm_v1'
},
countries_nlp: {
text: 'Countries (NLP)',
keypath: 'fields.countries.ms_countries_nlp_v1'
},
years_nlp: {
text: 'Years (NLP)',
keypath: 'fields.years.ms_years_nlp_v1'
}
};

export const fieldMap = {
afile: afileFieldMap,
page: pageFieldMap
};

const pageFieldsToSearchWithin = [
'anumber',
'form_title_llm',
'countries_nlp',
'years_nlp',
'full_text'
].map((field) => ({
id: field,
text: pageFieldMap[field]['text'],
keypath: pageFieldMap[field]['keypath']
}));

const afileFieldsToSearchWithin = [
'id',
'form_titles_llm',
'countries_nlp',
'last_name_nara',
'first_name_nara',
'poe_nara',
'cob_nara'
].map((field) => ({
id: field,
text: afileFieldMap[field]['text'],
keypath: afileFieldMap[field]['keypath']
}));

export const fieldsToSearchWithin = {
afile: afileFieldsToSearchWithin,
page: pageFieldsToSearchWithin
};
2 changes: 1 addition & 1 deletion src/lib/scope.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,5 @@ export function getScopeIndex(scope) {

export function updateScopeIndex(index) {
const newScope = validScopes[index];
goto(`?tab=${newScope}`, { replaceState: true, invalidateAll: true })
goto(`?tab=${newScope}`, { replaceState: true, invalidateAll: true });
}
64 changes: 46 additions & 18 deletions src/lib/search.js
Original file line number Diff line number Diff line change
@@ -1,32 +1,60 @@
import { goto } from '$app/navigation';
import { base } from '$app/paths';
import { fieldMap } from '$lib/fields';
import MiniSearch from 'minisearch';

const FlexSearch = require('flexsearch');

const index = FlexSearch.Index({});
function pMiniConfig(selectedFields) {
return {
fields: selectedFields, //fields to index for full-text search
extractField: (document, fieldName) => {
return fieldName.split('.').reduce((doc, key) => {
const intKey = parseInt(key, 10);
return doc && doc[isNaN(intKey) ? key : intKey];
}, document);
},
storeFields: ['id', 'anumber', 'page_index', 'full_text', 'fields'] // fields to return with search results
};
}

export function addDocument(id, content) {
index.add(id, content);
function aMiniConfig(selectedFields) {
return {
fields: selectedFields, // fields to index for full-text search
extractField: (document, fieldName) => {
return fieldName.split('.').reduce((doc, key) => {
const intKey = parseInt(key, 10);
return doc && doc[isNaN(intKey) ? key : intKey];
}, document);
},
storeFields: ['id', 'fields'] // fields to return with search results
};
}

export function search(query) {
return index.search(query);
export async function search(scope, searchParams) {
const jsonPath = `${base}/api/index/${scope}.json`;
const selectedFields = searchParams
.getAll('selectedFields')
.map((field) => fieldMap[scope][field]['keypath']);
const miniSearch = new MiniSearch(
scope == 'afile' ? aMiniConfig(selectedFields) : pMiniConfig(selectedFields)
);
const query = searchParams.get('query') || MiniSearch.wildcard;
const results = await fetch(jsonPath)
.then((resp) => resp.json())
.then((data) => {
miniSearch.addAll(data);
console.log('miniSearch', miniSearch);

return miniSearch.search(query, { prefix: true, combine: 'AND', fuzzy: 0.1 });
})
.catch((err) => console.error(err));
return results;
}

export function handleSubmit(event) {
event.preventDefault();
localStorage.setItem('formReferrer', window.location.href);
const data = new FormData(event.target);
const scope = event.target.attributes['scope'].value;
const formValues = {};

for (const [key, value] of data) {
if (Object.prototype.hasOwnProperty.call(formValues, key)) {
formValues[key] = [].concat(formValues[key], value);
} else {
formValues[key] = value;
}
}
const queryString = new URLSearchParams(formValues).toString();
goto(`${base}/results/${scope}?${queryString}`);
const params = new URLSearchParams(data);
goto(`${base}/results/${scope}?${params.toString()}`);
}
10 changes: 4 additions & 6 deletions src/routes/results/[scope]/+page.js
Original file line number Diff line number Diff line change
@@ -1,21 +1,19 @@
import { error } from '@sveltejs/kit';
import { validScopes } from '$lib/scope';
import { base } from '$app/paths';
import { search } from '$lib/search';

export async function load({ url, params }) {
const vUrl = new URL(url.href);
const searchParams = new URLSearchParams(vUrl.search);
const scope = params.scope;

if (validScopes.includes(scope)) {
const jsonPath = `${base}/api/index/${scope}.json`;
const resp = await fetch(jsonPath);
const results = (await resp.json()) || [];
const results = await search(scope, searchParams);
console.log('results', results);
return {
url: url.href,
scope: scope,
results: results,
searchParams: searchParams
results: results
};
} else {
error(404, 'Not Found');
Expand Down
20 changes: 1 addition & 19 deletions src/routes/results/[scope]/+page.svelte
Original file line number Diff line number Diff line change
Expand Up @@ -3,27 +3,9 @@
import { ClickableTile, Pagination } from 'carbon-components-svelte';
export let data;
console.log(data.searchParams);
console.log(data.results);
const results = data.results;
localStorage.setItem('resultReferrer', data.url);
// import { addDocument, search } from '$lib/search';
// Sample documents
// const documents = [
// { id: 1, content: 'First document content' },
// { id: 2, content: 'Second document content' }
// ];
// onMount(() => {
// documents.forEach((doc) => addDocument(doc.id, doc.content));
// });
// function handleSearch() {
// results = search(query, { suggest: true });
// }
const safeDetail = (result, label, key, method) => {
if (result?.fields?.[key]?.[method]) {
return `${label}: ${result.fields[key][method]}; `;
Expand Down Expand Up @@ -93,7 +75,7 @@
<a href={localStorage.getItem('formReferrer')}>Back to search</a>
{/if}
<h1 class="py-4">Search Results</h1>
<h1 class="py-4">Search Results ({totalItems})</h1>
<Pagination
class="mb-6"
Expand Down
37 changes: 0 additions & 37 deletions static/api/index/g325a.json
Original file line number Diff line number Diff line change
@@ -1,41 +1,4 @@
[
{
"id": "A10712436_0021",
"page_index": 21,
"resources": {
"full_jpg": "https://dctn4zjpwgdwdiiy5odjv7o2se0bqgjb.lambda-url.us-east-1.on.aws/iiif/3/og-2023-kc-nara_A10712436_0021/full/max/0/default.jpg",
"ocr_txt": true
},
"fields": {
"sex": {
"ms_sex_llm_v1": "male"
},
"doctype": {
"ms_doctype_v1": "form"
},
"form_title": {
"ms_form_title_llm_v1": "Title: BIOGRAPHIC INFORMATION\\nForm Number: OMB No. 1115-0066"
},
"is_g325a": true,
"is_cert_naturalization": false,
"countries": {
"ms_countries_nlp_v1": ["United States"]
},
"g325a": {
"occupation": {
"occupation_llm_v1": "['n.a']"
},
"reason": {
"reason_llm_v1": "NATURALIZATION"
},
"nationality": {
"nationality_llm_v1": "Salvadoran"
}
}
},
"anumber": "A10712436",
"full_text": "U.S. Department of Justice OMB No. 1115-0066 Immigration and Naturalization Service BIOGRAPHIC INFORMATION (Family name) (First name) (Middle name) MALE BIRTHDATE(Mo.-Day-Yr.) NATIONALITY FILE NUMBER Renderos Rosa Aminta FEMALE 2-28-13 SALVADOREAN 10 712 436 ALL OTHER NAMES USED (Including names by previous marriages) CITY AND COUNTRY OF BIRTH SOCIAL SECURITY NO. SAME NAHUIZALED EL SALVADOR C.A. 566-54-1023 (If. any) FAMILY NAME FIRST NAME DATE, CITY AND COUNTRY OF BIRTH(If known) CITY AND COUNTRY OF RESIDENCE. DominGuez RomuLo sonsonate, EL S a LV2DOR C.A. FATHER MOTHER(Maiden name) JuLiA CeLSA Renderos, same HUSBAND (If none, so state) FAMILY NAME FIRST NAME BIRTHDATE CITY & COUNTRY OF BIRTH DATE OF MARRIAGE PLACE OF MARRIAGE OR (For wife. give maiden name) WIFE none FORMER HUSBANDS OR WIVES (if none,so state) FAMILY NAME (For wife, give maiden name) FIRST NAME BIRTHDATE DATE & PLACE OF MARRIAGE DATE AND PLACE OF TERMINATION OF MARRIAGE APPLICANT'S RESIDENCE LAST FIVE YEARS. LIST PRESENT ADDRESS FIRST. FROM TO STREET AND NUMBER CITY PROVINCE OR STATE COUNTRY MONTH YEAR MONTH YEAR 11536 E. ImpERIaL Norwark CA. 12 79 PRESENT TIME APPLICANT'S LAST ADDRESS OUTSIDE THE UNITED STATES OF MORE THAN ONE YEAR FROM TO STREET AND NUMBER CITY PROVINCE OR STATE COUNTRY MONTH YEAR MONTH YEAR 13 C. OTe. 2-2 sonsonate EL SaLVaDor C 11 77 12 79 APPLICANT'S EMPLOYMENT LAST FIVE YEARS. (IF NONE, so STATE) LIST PRESENT EMPLOYMENT FIRST FROM TO FULL NAME AND ADDRESS OF EMPLOYER OCCUPATION (SPECIFY) MONTH YEAR MONTH YEAR none PRESENT TIME Show below last occupation abroad if not shown above. (Include all information requested above.) THIS FORM IS SUBMITTED IN CONNECTION WITH APPLICATION FOR: SIGNATURE OF APPLICANT DATE NATURALIZATION OTHER (SPECIFY): STATUS AS PERMANENT RESIDENT Rosal Aminta Renderos 4-18-88 IF YOUR NATIVE ALPHABET IS IN OTHER THAN ROMAN LETTERS. WRITE YOUR NAME IN YOUR NATIVE ALFNABET IN THIS SPACE: Are all copies legible? Yes PENALTIES: SEVERE PENALTIES ARE PROVIDED BY LAW FOR KNOWINGLY AND WILLFULLY FALSIFYING OR CONCEALING A MATERIAL FACT. APPLICANT: BE SURE TO PUT YOUR NAME AND ALIEN REGISTRATION NUMBER IN THE BOX OUTLINED BY HEAVY BORDER BELOW. COMPLETE THIS BOX (Family name) (Given name) (Middle name) (Alien registration number) Form G-325 (Rev. 10-1-82) Y (1) Ident."
},
{
"id": "A10712436_0021",
"page_index": 21,
Expand Down
Loading

0 comments on commit 404e824

Please sign in to comment.