Skip to content

Commit

Permalink
feat: add PDT version and one router for all versions
Browse files Browse the repository at this point in the history
  • Loading branch information
Atticus64 committed Dec 21, 2023
1 parent 303f4e5 commit 0f81aa3
Show file tree
Hide file tree
Showing 12 changed files with 542 additions and 230 deletions.
177 changes: 177 additions & 0 deletions scrapy.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
import { books } from "$/scraping/index.ts";
import * as cherio from "https://esm.sh/cheerio";
import { log, loggers } from "$/scraping/logger.ts";

const uri = "https://www.bibliatodo.com/la-biblia"

const versions = [
"Palabra-de-Dios-para-todos",
"Reina-valera-1995",
"Dios-habla-hoy"
]

const getUrls = (book: string, chapters: number, version: string) => {
const urls = [];
for (let i = 1; i <= chapters; i++) {
urls.push(
`${uri}/${version}/${book}-${i}`,
);
}

return urls;
};

function getFolderName(version: string) {

if (version === "Palabra-de-Dios-para-todos") {
return "pdt"
} else if (version === "Reina-valera-1995") {
return "rv1995"
} else if (version === "Dios-habla-hoy") {
return "dhh"
}
}


async function fillVersion(version: string) {

const versionName = getFolderName(version)
Deno.mkdirSync(`./${versionName}`)
Deno.mkdirSync(`./${versionName}/old`)
Deno.mkdirSync(`./${versionName}/new`)
for (const b of books) {
const testament = b.testament === "Nuevo Testamento" ? "new" : "old"
const cs = []
let coded = ''
const chaps = b.chapters;

if (b.name.includes("-")) {
const [entry, name] = b.name.split("-")
coded = `${entry}${name.toLowerCase()}`
} else {
coded = b.name.toLowerCase()
}


const urls = getUrls(coded, chaps, version)
const requests = urls.map((url) => fetch(url));

const responses = await Promise.all(requests)

let i = 1
for (const resp of responses) {
const vers = await getChapter(resp)
const chapter = {
chapter: i,
verses: vers
}
cs.push(chapter)
i++
}

const data = {
name: b.name,
testament,
chapters: cs
}
const json = JSON.stringify(data, null, '\t')
if (testament === "new") {
Deno.writeTextFile(`./${versionName}/new/${b.name.toLowerCase()}.json`, json)
} else {
Deno.writeTextFile(`./${versionName}/old/${b.name.toLowerCase()}.json`, json)
}
log(b.name, "info")

}
}


function parse(text: string) {

const chars = text.split('')
let idx = 0
for (const c of chars) {
if (!isNaN(Number(c))) {
text = text.replace(c, ' ')
} else if (c === ' ' && isNaN(Number(chars[idx+1])) || idx > 4) {
break
}
idx++
}

text = text.trim()

return text
}

async function getChapter(resp: Response) {
const html = await resp.text()
const $ = cherio.load(html);
const info = $("#info_capitulo").children()

let i = 0
let j = 0
const verses = []
for (const c of info) {
const next = info[j+1]
const prev = info[j-1]
let insert = false;
const isVerse = (c) => c.tagName === "p"
const isStudy = (c) => c.tagName === "h2" || c.tagName === "span"
if (c.tagName === "p" || c.tagName === "h2" || c.tagName === "span") {
if (isStudy(c)) {
if (next !== undefined && isVerse(next) || next.tagName === "span" ) {
if (isStudy(next)) {
const realN = info[j+2]
if (realN !== undefined && isVerse(realN)) {
let text = parse($(realN).text())
text = text.replaceAll(' ', '');
verses.push({
study: $(c).text(),
verse: text,
number: i + 1
})
insert = true;
i++;
}

} else if (isVerse(next)) {
let text = parse($(next).text())
text = text.replaceAll(' ', '');
verses.push({
study: $(c).text(),
verse: text,
number: i + 1
})
i++;

}
}
} else if (c.tagName === "p") {
if (prev !== undefined && !isStudy(prev) || i === 0) {
if (insert || prev !== undefined && prev.tagName !== "p") {
i++;
insert = false
} else {
let text = parse($(c).text())
text = text.replaceAll(' ', '');

verses.push({
verse: text,
number: i + 1
})
i++
}
}
}

}
j++
}

return verses
}

await fillVersion("Dios-habla-hoy")


135 changes: 75 additions & 60 deletions script.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import { connect } from "./src/database/index.ts";
import { books } from "$/scraping/index.ts";
import { DataBook } from "$/scraping/scrape.ts";
import { Client } from "https://deno.land/x/[email protected]/mod.ts";
import "https://deno.land/x/[email protected]/load.ts";
import { create, insertMultiple, search } from "npm:@orama/orama";

const sql = connect();

async function createMemoryDB() {
Expand All @@ -22,6 +24,7 @@ async function createMemoryDB() {
return db;
}


async function fillMemoryDB(db: any) {
const r =
await sql`Select verses_nvi.id as id, verses_nvi.number as number, verse, study, name, chapter, chapter_id from verses_nvi join chapters on verses_nvi.chapter_id = chapters.id join books on books.id = chapters.book_id`;
Expand Down Expand Up @@ -73,8 +76,8 @@ const findVerses = async (db: any, term: string) => {
});
};

const nvi = await createMemoryDB();
await fillMemoryDB(nvi);
//const nvi = await createMemoryDB();
//await fillMemoryDB(nvi);

// const sql = connect();

Expand Down Expand Up @@ -146,63 +149,75 @@ await fillMemoryDB(nvi);

// const r = await sql`select * from verses_nvi WHERE UNACCENT(LOWER(verse)) LIKE '%josue%'
// // LIMIT 10;`
// console.log(r)

const client = await new Client(Deno.env.get("DATABASE_URL") ?? "")

await client.connect()

const table = `verses_dhh`
const version = 'dhh'

await client.queryArray(`DROP TABLE if exists ${table}`)
// sql.end()
// await client.queryArray(`
// create table verses_dhh (
// id serial primary key,
// verse text not null,
// study text,
// number integer not null,
// chapter integer not null,
// chapter_id integer not null,
// foreign key (chapter_id) references chapters(id)
// )`)
//
// const data = []
// for(const b of books.filter(b => b.testament === 'Antiguo Testamento')) {
// const raw = await Deno.readTextFile(`./db/dhh/oldTestament/${b.name.toLowerCase()}.json`)
// const info: DataBook = await JSON.parse(raw)
//
// const {rows} = await client.queryArray(`select chapters.id from chapters JOIN books ON chapters.book_id = books.id WHERE books.name = '${b.name}'`)
//
// info.chapters.forEach(c => {
// const index = Number(c.chapter)
// c.vers.forEach(v => {
// data.push({
// verse: v.verse,
// study: v.study,
// number: v.number,
// chapter: Number(c.chapter),
// chapter_id: rows[index - 1][0]
// })
// })
// })
// console.log(b.name)
// }
//
// for(const b of books.filter(b => b.testament === 'Nuevo Testamento')) {
// const raw = await Deno.readTextFile(`./db/dhh/newTestament/${b.name.toLowerCase()}.json`)
// const info: DataBook = await JSON.parse(raw)
//
// const {rows} = await client.queryArray(`select chapters.id from chapters JOIN books ON chapters.book_id = books.id WHERE books.name = '${b.name}'`)
// info.chapters.forEach(c => {
// const index = Number(c.chapter)
// c.vers.forEach(v => {
// data.push({
// verse: v.verse,
// study: v.study,
// number: v.number,
// chapter: Number(c.chapter),
// chapter_id: rows[index -1][0]
// })
// })
//
// })
// console.log(b.name)
// }
//
// const r = await client.queryArray(`INSERT INTO verses_dhh (verse, study, number, chapter_id, chapter) VALUES ${data.map(d => `('${d.verse}', ${d.study ? `'${d.study}'`: null}, ${d.number}, ${d.chapter_id}, ${d.chapter})`).join(',')}`)
// console.log(r)
//
await sql`
create table ${sql(table)} (
id serial primary key,
verse text not null,
study text,
number integer not null,
chapter integer not null,
chapter_id integer not null,
foreign key (chapter_id) references chapters(id)
)`
////
const data = []
for(const b of books.filter(b => b.testament === 'Antiguo Testamento')) {
const raw = await Deno.readTextFile(`./${version}/old/${b.name.toLowerCase()}.json`)

const info: DataBook = await JSON.parse(raw)
const name = `${b.name}`

const rows = await sql`select chapters.id from chapters JOIN books ON chapters.book_id = books.id WHERE books.name = ${name}`

info.chapters.forEach(c => {
const index = Number(c.chapter)
c.vers.forEach(v => {
data.push({
verse: v.verse,
study: v.study,
number: v.number,
chapter: Number(c.chapter),
chapter_id: rows[index - 1].id
})
})
})
console.log(b.name)
}

for(const b of books.filter(b => b.testament === 'Nuevo Testamento')) {
const raw = await Deno.readTextFile(`./${version}/new/${b.name.toLowerCase()}.json`)
const info: DataBook = await JSON.parse(raw)

const name = `${b.name}`
const rows = await sql`select chapters.id from chapters JOIN books ON chapters.book_id = books.id WHERE books.name = ${name}`
info.chapters.forEach(c => {
const index = Number(c.chapter)
c.vers.forEach(v => {
data.push({
verse: v.verse,
study: v.study,
number: v.number,
chapter: Number(c.chapter),
chapter_id: rows[index - 1].id
})
})

})
console.log(b.name)
}

const r = await client.queryArray(`INSERT INTO ${table} (verse, study, number, chapter_id, chapter) VALUES ${data.map(d => `('${d.verse}', ${d.study ? `'${d.study}'`: null}, ${d.number}, ${d.chapter_id}, ${d.chapter})`).join(',')}`)
console.log(r)

sql.close()

Loading

0 comments on commit 0f81aa3

Please sign in to comment.