-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: add PDT version and one router for all versions
- Loading branch information
Showing
12 changed files
with
542 additions
and
230 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,177 @@ | ||
import { books } from "$/scraping/index.ts"; | ||
import * as cherio from "https://esm.sh/cheerio"; | ||
import { log, loggers } from "$/scraping/logger.ts"; | ||
|
||
const uri = "https://www.bibliatodo.com/la-biblia" | ||
|
||
const versions = [ | ||
"Palabra-de-Dios-para-todos", | ||
"Reina-valera-1995", | ||
"Dios-habla-hoy" | ||
] | ||
|
||
const getUrls = (book: string, chapters: number, version: string) => { | ||
const urls = []; | ||
for (let i = 1; i <= chapters; i++) { | ||
urls.push( | ||
`${uri}/${version}/${book}-${i}`, | ||
); | ||
} | ||
|
||
return urls; | ||
}; | ||
|
||
function getFolderName(version: string) { | ||
|
||
if (version === "Palabra-de-Dios-para-todos") { | ||
return "pdt" | ||
} else if (version === "Reina-valera-1995") { | ||
return "rv1995" | ||
} else if (version === "Dios-habla-hoy") { | ||
return "dhh" | ||
} | ||
} | ||
|
||
|
||
async function fillVersion(version: string) { | ||
|
||
const versionName = getFolderName(version) | ||
Deno.mkdirSync(`./${versionName}`) | ||
Deno.mkdirSync(`./${versionName}/old`) | ||
Deno.mkdirSync(`./${versionName}/new`) | ||
for (const b of books) { | ||
const testament = b.testament === "Nuevo Testamento" ? "new" : "old" | ||
const cs = [] | ||
let coded = '' | ||
const chaps = b.chapters; | ||
|
||
if (b.name.includes("-")) { | ||
const [entry, name] = b.name.split("-") | ||
coded = `${entry}${name.toLowerCase()}` | ||
} else { | ||
coded = b.name.toLowerCase() | ||
} | ||
|
||
|
||
const urls = getUrls(coded, chaps, version) | ||
const requests = urls.map((url) => fetch(url)); | ||
|
||
const responses = await Promise.all(requests) | ||
|
||
let i = 1 | ||
for (const resp of responses) { | ||
const vers = await getChapter(resp) | ||
const chapter = { | ||
chapter: i, | ||
verses: vers | ||
} | ||
cs.push(chapter) | ||
i++ | ||
} | ||
|
||
const data = { | ||
name: b.name, | ||
testament, | ||
chapters: cs | ||
} | ||
const json = JSON.stringify(data, null, '\t') | ||
if (testament === "new") { | ||
Deno.writeTextFile(`./${versionName}/new/${b.name.toLowerCase()}.json`, json) | ||
} else { | ||
Deno.writeTextFile(`./${versionName}/old/${b.name.toLowerCase()}.json`, json) | ||
} | ||
log(b.name, "info") | ||
|
||
} | ||
} | ||
|
||
|
||
function parse(text: string) { | ||
|
||
const chars = text.split('') | ||
let idx = 0 | ||
for (const c of chars) { | ||
if (!isNaN(Number(c))) { | ||
text = text.replace(c, ' ') | ||
} else if (c === ' ' && isNaN(Number(chars[idx+1])) || idx > 4) { | ||
break | ||
} | ||
idx++ | ||
} | ||
|
||
text = text.trim() | ||
|
||
return text | ||
} | ||
|
||
async function getChapter(resp: Response) { | ||
const html = await resp.text() | ||
const $ = cherio.load(html); | ||
const info = $("#info_capitulo").children() | ||
|
||
let i = 0 | ||
let j = 0 | ||
const verses = [] | ||
for (const c of info) { | ||
const next = info[j+1] | ||
const prev = info[j-1] | ||
let insert = false; | ||
const isVerse = (c) => c.tagName === "p" | ||
const isStudy = (c) => c.tagName === "h2" || c.tagName === "span" | ||
if (c.tagName === "p" || c.tagName === "h2" || c.tagName === "span") { | ||
if (isStudy(c)) { | ||
if (next !== undefined && isVerse(next) || next.tagName === "span" ) { | ||
if (isStudy(next)) { | ||
const realN = info[j+2] | ||
if (realN !== undefined && isVerse(realN)) { | ||
let text = parse($(realN).text()) | ||
text = text.replaceAll(' ', ''); | ||
verses.push({ | ||
study: $(c).text(), | ||
verse: text, | ||
number: i + 1 | ||
}) | ||
insert = true; | ||
i++; | ||
} | ||
|
||
} else if (isVerse(next)) { | ||
let text = parse($(next).text()) | ||
text = text.replaceAll(' ', ''); | ||
verses.push({ | ||
study: $(c).text(), | ||
verse: text, | ||
number: i + 1 | ||
}) | ||
i++; | ||
|
||
} | ||
} | ||
} else if (c.tagName === "p") { | ||
if (prev !== undefined && !isStudy(prev) || i === 0) { | ||
if (insert || prev !== undefined && prev.tagName !== "p") { | ||
i++; | ||
insert = false | ||
} else { | ||
let text = parse($(c).text()) | ||
text = text.replaceAll(' ', ''); | ||
|
||
verses.push({ | ||
verse: text, | ||
number: i + 1 | ||
}) | ||
i++ | ||
} | ||
} | ||
} | ||
|
||
} | ||
j++ | ||
} | ||
|
||
return verses | ||
} | ||
|
||
await fillVersion("Dios-habla-hoy") | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,8 +1,10 @@ | ||
import { connect } from "./src/database/index.ts"; | ||
import { books } from "$/scraping/index.ts"; | ||
import { DataBook } from "$/scraping/scrape.ts"; | ||
import { Client } from "https://deno.land/x/[email protected]/mod.ts"; | ||
import "https://deno.land/x/[email protected]/load.ts"; | ||
import { create, insertMultiple, search } from "npm:@orama/orama"; | ||
|
||
const sql = connect(); | ||
|
||
async function createMemoryDB() { | ||
|
@@ -22,6 +24,7 @@ async function createMemoryDB() { | |
return db; | ||
} | ||
|
||
|
||
async function fillMemoryDB(db: any) { | ||
const r = | ||
await sql`Select verses_nvi.id as id, verses_nvi.number as number, verse, study, name, chapter, chapter_id from verses_nvi join chapters on verses_nvi.chapter_id = chapters.id join books on books.id = chapters.book_id`; | ||
|
@@ -73,8 +76,8 @@ const findVerses = async (db: any, term: string) => { | |
}); | ||
}; | ||
|
||
const nvi = await createMemoryDB(); | ||
await fillMemoryDB(nvi); | ||
//const nvi = await createMemoryDB(); | ||
//await fillMemoryDB(nvi); | ||
|
||
// const sql = connect(); | ||
|
||
|
@@ -146,63 +149,75 @@ await fillMemoryDB(nvi); | |
|
||
// const r = await sql`select * from verses_nvi WHERE UNACCENT(LOWER(verse)) LIKE '%josue%' | ||
// // LIMIT 10;` | ||
// console.log(r) | ||
|
||
const client = await new Client(Deno.env.get("DATABASE_URL") ?? "") | ||
|
||
await client.connect() | ||
|
||
const table = `verses_dhh` | ||
const version = 'dhh' | ||
|
||
await client.queryArray(`DROP TABLE if exists ${table}`) | ||
// sql.end() | ||
// await client.queryArray(` | ||
// create table verses_dhh ( | ||
// id serial primary key, | ||
// verse text not null, | ||
// study text, | ||
// number integer not null, | ||
// chapter integer not null, | ||
// chapter_id integer not null, | ||
// foreign key (chapter_id) references chapters(id) | ||
// )`) | ||
// | ||
// const data = [] | ||
// for(const b of books.filter(b => b.testament === 'Antiguo Testamento')) { | ||
// const raw = await Deno.readTextFile(`./db/dhh/oldTestament/${b.name.toLowerCase()}.json`) | ||
// const info: DataBook = await JSON.parse(raw) | ||
// | ||
// const {rows} = await client.queryArray(`select chapters.id from chapters JOIN books ON chapters.book_id = books.id WHERE books.name = '${b.name}'`) | ||
// | ||
// info.chapters.forEach(c => { | ||
// const index = Number(c.chapter) | ||
// c.vers.forEach(v => { | ||
// data.push({ | ||
// verse: v.verse, | ||
// study: v.study, | ||
// number: v.number, | ||
// chapter: Number(c.chapter), | ||
// chapter_id: rows[index - 1][0] | ||
// }) | ||
// }) | ||
// }) | ||
// console.log(b.name) | ||
// } | ||
// | ||
// for(const b of books.filter(b => b.testament === 'Nuevo Testamento')) { | ||
// const raw = await Deno.readTextFile(`./db/dhh/newTestament/${b.name.toLowerCase()}.json`) | ||
// const info: DataBook = await JSON.parse(raw) | ||
// | ||
// const {rows} = await client.queryArray(`select chapters.id from chapters JOIN books ON chapters.book_id = books.id WHERE books.name = '${b.name}'`) | ||
// info.chapters.forEach(c => { | ||
// const index = Number(c.chapter) | ||
// c.vers.forEach(v => { | ||
// data.push({ | ||
// verse: v.verse, | ||
// study: v.study, | ||
// number: v.number, | ||
// chapter: Number(c.chapter), | ||
// chapter_id: rows[index -1][0] | ||
// }) | ||
// }) | ||
// | ||
// }) | ||
// console.log(b.name) | ||
// } | ||
// | ||
// const r = await client.queryArray(`INSERT INTO verses_dhh (verse, study, number, chapter_id, chapter) VALUES ${data.map(d => `('${d.verse}', ${d.study ? `'${d.study}'`: null}, ${d.number}, ${d.chapter_id}, ${d.chapter})`).join(',')}`) | ||
// console.log(r) | ||
// | ||
await sql` | ||
create table ${sql(table)} ( | ||
id serial primary key, | ||
verse text not null, | ||
study text, | ||
number integer not null, | ||
chapter integer not null, | ||
chapter_id integer not null, | ||
foreign key (chapter_id) references chapters(id) | ||
)` | ||
//// | ||
const data = [] | ||
for(const b of books.filter(b => b.testament === 'Antiguo Testamento')) { | ||
const raw = await Deno.readTextFile(`./${version}/old/${b.name.toLowerCase()}.json`) | ||
|
||
const info: DataBook = await JSON.parse(raw) | ||
const name = `${b.name}` | ||
|
||
const rows = await sql`select chapters.id from chapters JOIN books ON chapters.book_id = books.id WHERE books.name = ${name}` | ||
|
||
info.chapters.forEach(c => { | ||
const index = Number(c.chapter) | ||
c.vers.forEach(v => { | ||
data.push({ | ||
verse: v.verse, | ||
study: v.study, | ||
number: v.number, | ||
chapter: Number(c.chapter), | ||
chapter_id: rows[index - 1].id | ||
}) | ||
}) | ||
}) | ||
console.log(b.name) | ||
} | ||
|
||
for(const b of books.filter(b => b.testament === 'Nuevo Testamento')) { | ||
const raw = await Deno.readTextFile(`./${version}/new/${b.name.toLowerCase()}.json`) | ||
const info: DataBook = await JSON.parse(raw) | ||
|
||
const name = `${b.name}` | ||
const rows = await sql`select chapters.id from chapters JOIN books ON chapters.book_id = books.id WHERE books.name = ${name}` | ||
info.chapters.forEach(c => { | ||
const index = Number(c.chapter) | ||
c.vers.forEach(v => { | ||
data.push({ | ||
verse: v.verse, | ||
study: v.study, | ||
number: v.number, | ||
chapter: Number(c.chapter), | ||
chapter_id: rows[index - 1].id | ||
}) | ||
}) | ||
|
||
}) | ||
console.log(b.name) | ||
} | ||
|
||
const r = await client.queryArray(`INSERT INTO ${table} (verse, study, number, chapter_id, chapter) VALUES ${data.map(d => `('${d.verse}', ${d.study ? `'${d.study}'`: null}, ${d.number}, ${d.chapter_id}, ${d.chapter})`).join(',')}`) | ||
console.log(r) | ||
|
||
sql.close() | ||
|
Oops, something went wrong.