diff --git a/.DS_Store b/.DS_Store deleted file mode 100644 index 0761d81..0000000 Binary files a/.DS_Store and /dev/null differ diff --git a/.eslintrc b/.eslintrc new file mode 100644 index 0000000..190bccd --- /dev/null +++ b/.eslintrc @@ -0,0 +1,8 @@ +{ + "extends": "airbnb/legacy", + "rules": { + "comma-dangle": [2, "never"], + "func-names": 0, + "no-console": 0 + } +} diff --git a/.gitignore b/.gitignore index d2bc0ea..f7669a3 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ node_modules xml .env +.DS_Store diff --git a/gulpfile.js b/gulpfile.js index 8e1abc6..30d09cc 100644 --- a/gulpfile.js +++ b/gulpfile.js @@ -1,14 +1,13 @@ -var - gulp = require('gulp'), - cheerio = require('gulp-cheerio'), - gimporter = require('./lib/gimporter'), - indexer = require('./lib/indexer'), - elastic = require('./lib/elastic'), - Q = require('q'); +var gulp = require('gulp'); +var cheerio = require('gulp-cheerio'); +var gimporter = require('./lib/gimporter'); +var indexer = require('./lib/indexer'); +var elastic = require('./lib/elastic'); +var eslint = require('gulp-eslint'); gulp.task('import', function () { return gulp - .src(['xml/**/*.*','!xml/**/{POPUP,POPUP/**}']) + .src(['xml/**/*.*', '!xml/**/{POPUP,POPUP/**}']) .pipe(cheerio({ parserOptions: { xmlMode: true @@ -16,18 +15,17 @@ gulp.task('import', function () { run: function ($, file, done) { console.log('Processing: ' + file.path); gimporter.processFile($, file) - .then(function(){ + .then(function () { done(); }); - } - })); + } })); }); -gulp.task('process_template', function() { +gulp.task('process_template', function () { elastic.setTemplate(); }); -gulp.task('index', function() { +gulp.task('index', function () { indexer.perform(); }); @@ -35,8 +33,28 @@ gulp.task('reindex', function () { indexer.reindex(); }); -gulp.task('run', ['import', 'process_template', 'index']); +gulp.task('lint', function () { + return gulp.src(['**/*.js', '!node_modules/**']) + .pipe(eslint()) + .pipe(eslint.format()) + .pipe(eslint.failAfterError()); +}); + +gulp.task('lint-watch', function () { + var lintAndPrint = eslint(); + + lintAndPrint.pipe(eslint.formatEach()); + + return gulp.watch('./lib/**/*.js', function (event) { + if (event.type !== 'deleted') { + gulp.src(event.path) + .pipe(lintAndPrint, { + end: false + }); + } + }); +}); -gulp.task('default', ['import'], function() { +gulp.task('default', ['import'], function () { process.exit(); }); diff --git a/lib/elastic.js b/lib/elastic.js index a23f07b..c6f79ca 100644 --- a/lib/elastic.js +++ b/lib/elastic.js @@ -1,44 +1,47 @@ -var elastic = {}, - elasticsearch = require('elasticsearch'), - Q = require('q'), - TEMPLATE_NAME = "template_gaceta", - template_mapping = require("./template/template_mapping.json"), - client = new elasticsearch.Client({ - host: 'localhost:9200' - }); +var elasticsearch = require('elasticsearch'); +var templateMapping = require('./template/template_mapping.json'); +var Q = require('q'); +var client = {}; +var elastic = {}; +var TEMPLATE_NAME = 'template_gaceta'; + +client = new elasticsearch.Client({ + host: 'localhost:9200' +}); -elastic.deleteTemplate = function() { - var deferred = Q.defer(); +elastic.deleteTemplate = function () { + var deferred = Q.defer(); - client.indices.existsTemplate({ + client.indices.existsTemplate({ + name: TEMPLATE_NAME + }, function () { + client.indices.deleteTemplate({ name: TEMPLATE_NAME - }, function() { - client.indices.deleteTemplate({ - name: TEMPLATE_NAME - }, function(err) { - if (err) { - err; - } - deferred.resolve(); - }); + }, function (err) { + if (err) { + throw err; + } + + deferred.resolve(); }); + }); - return deferred.promise; + return deferred.promise; }; -elastic.setTemplate = function() { - this.deleteTemplate().then(function() { - client.indices.putTemplate({ - create: true, - name: TEMPLATE_NAME, - body: template_mapping - }, function(err) { - if (err) { - throw err; - } - console.log('Template added:', TEMPLATE_NAME); - }); +elastic.setTemplate = function () { + this.deleteTemplate().then(function () { + client.indices.putTemplate({ + create: true, + name: TEMPLATE_NAME, + body: templateMapping + }, function (err) { + if (err) { + throw err; + } + console.log('Template added:', TEMPLATE_NAME); }); + }); }; -module.exports = elastic; \ No newline at end of file +module.exports = elastic; diff --git a/lib/gimporter.js b/lib/gimporter.js index 1137707..d8bdbdb 100644 --- a/lib/gimporter.js +++ b/lib/gimporter.js @@ -1,17 +1,17 @@ -var - mysql = require('mysql'), - extend = require('extend'), - dotenv = require('dotenv'), - Q = require('q'), +var mysql = require('mysql'); +var extend = require('extend'); +var dotenv = require('dotenv'); +var Q = require('q'); - LEGISLACION_CODE = '1', - JURISPRUDENCIA_CODE = '3', +var LEGISLACION_CODE = '1'; +var JURISPRUDENCIA_CODE = '3'; - gimporter = function(){}; +var gimporter = {}; +var connection = {}; dotenv.config(); -var connection = mysql.createConnection({ +connection = mysql.createConnection({ host: process.env.DB_HOST || '127.0.0.1', user: process.env.DB_USER, password: process.env.DB_PASSWORD, @@ -24,97 +24,109 @@ connection.connect(); function unwrap(childrens) { var escapedChildrens = []; - childrens.forEach(function(el){ - if (el.type === 'tag' && el.name === 'a') { - if (el.children) { - var els = unwrap(el.children); - Array.prototype.push.apply(escapedChildrens, els); - } else { - if (el.type === 'tag' && el.name === 'a') { - el.children.forEach(function(_el){ - escapedChildrens.push(el); - }) - } else { - escapedChildrens.push(el); - } - } + + childrens.forEach(function (children) { + var els = []; + var el = children; + + if (el.type === 'tag' && el.name === 'a') { + if (el.children) { + els = unwrap(el.children); + Array.prototype.push.apply(escapedChildrens, els); } else { - escapedChildrens.push(el); - if (el.children) { - el.children = unwrap(el.children); + if (el.type === 'tag' && el.name === 'a') { + el.children.forEach(function (_el) { + escapedChildrens.push(_el); + }); + } else { + escapedChildrens.push(el); } } + } else { + escapedChildrens.push(el); + if (el.children) { + el.children = unwrap(el.children); + } + } }); return escapedChildrens; } -gimporter.updateRecord = function (data,tableName) { - var deferred = Q.defer(); - data.indexado = 'NO'; - data.is_new = 'NO'; +gimporter.updateRecord = function (data, tableName) { + var deferred = Q.defer(); + var insertData = data; + var queryParams = []; - connection.query('UPDATE ?? SET ? WHERE cms_id = ?', [tableName, data, data.cms_id], function (err, rows, fields) { - if (err) { - throw err; - } - deferred.resolve(); - }); + insertData.indexado = 'NO'; + insertData.is_new = 'NO'; + + queryParams = [tableName, insertData, insertData.cms_id]; - return deferred.promise; + connection.query('UPDATE ?? SET ? WHERE cms_id = ?', queryParams, function (err) { + if (err) { + throw err; + } + deferred.resolve(); + }); + + return deferred.promise; }; -gimporter.addRecord = function (data,tableName) { - var deferred = Q.defer(); - connection.query('INSERT INTO ?? SET ?', [tableName, data], function (err, rows, fields) { - if (err) { - throw err; - } - deferred.resolve(); - }); - return deferred.promise; +gimporter.addRecord = function (data, tableName) { + var deferred = Q.defer(); + connection.query('INSERT INTO ?? SET ?', [tableName, data], function (err) { + if (err) { + throw err; + } + deferred.resolve(); + }); + return deferred.promise; }; -gimporter.insertFile = function(data, tableName) { +gimporter.insertFile = function (data, tableName) { var deferred = Q.defer(); var self = this; var baseData = { created_at: new Date(), updated_at: new Date() }; - var q = connection.query('SELECT * FROM ?? WHERE cms_id = ? LIMIT 1', [tableName, data.cms_id], function (err, rows) { + var queryParams = [tableName, data.cms_id]; + + connection.query('SELECT * FROM ?? WHERE cms_id = ? LIMIT 1', queryParams, function (err, rows) { + var data2 = {}; + if (err) { throw err; } - var data2 = extend(data, baseData); - if(rows[0]){ - self.updateRecord(data2,tableName).then(function () { - deferred.resolve(); - }); - - }else{ - self.addRecord(data2,tableName).then(function () { - deferred.resolve(); - }); + data2 = extend(data, baseData); + + if (rows[0]) { + self.updateRecord(data2, tableName).then(function () { + deferred.resolve(); + }); + } else { + self.addRecord(data2, tableName).then(function () { + deferred.resolve(); + }); } }); return deferred.promise; }; -gimporter.getDataFromFile = function($file, file) { +gimporter.getDataFromFile = function ($file, file) { var fileType = $file('view').attr('cms_collection_id'); if (fileType === LEGISLACION_CODE) { return this.getLegislacionData($file, file); - } - else if (fileType === JURISPRUDENCIA_CODE) { + } else if (fileType === JURISPRUDENCIA_CODE) { return this.getJurisprudenciaData($file, file); } - throw 'Archivo con cms_collection_id desconocido'; + throw new Error('Archivo con cms_collection_id desconocido'); }; -gimporter.getLegislacionData = function($file, file) { +gimporter.getLegislacionData = function ($file, file) { var cmsConfigExcludes = ['titulo', 'keywords', 'datos_generales']; var titleEl = $file('view > [cms_config="titulo"]'); var contentHtml = ''; @@ -124,14 +136,13 @@ gimporter.getLegislacionData = function($file, file) { var data = {}; var $view = $file('view'); - $file('version').not(function(i, el){ + $file('version').not(function (i, el) { return cmsConfigExcludes.indexOf(el.attribs.cms_config) > -1; }).each(function (k, v) { + var $el = $file(v); + var childrens = []; - var $el = $file(v), - childrens = []; - - $el.contents().each(function(i, el) { + $el.contents().each(function (i, el) { childrens.push(el); }); @@ -165,7 +176,7 @@ gimporter.getLegislacionData = function($file, file) { return data; }; -gimporter.getJurisprudenciaData = function($file, file) { +gimporter.getJurisprudenciaData = function ($file, file) { var cmsConfigExcludes = ['datos_generales']; var titleEl = $file('view > [cms_config="Sumilla"]'); var contentHtml = ''; @@ -175,13 +186,13 @@ gimporter.getJurisprudenciaData = function($file, file) { var data = {}; var $view = $file('view'); - $file('version').not(function(i, el){ + $file('version').not(function (i, el) { return cmsConfigExcludes.indexOf(el.attribs.cms_config) > -1; }).each(function (k, v) { - var $el = $file(v), - childrens = []; + var $el = $file(v); + var childrens = []; - $el.contents().each(function(i, el) { + $el.contents().each(function (i, el) { childrens.push(el); }); @@ -214,25 +225,24 @@ gimporter.getJurisprudenciaData = function($file, file) { return data; }; -gimporter.getTableName = function($file, file) { +gimporter.getTableName = function ($file) { var fileType = $file('view').attr('cms_collection_id'); if (fileType === LEGISLACION_CODE) { return 'legislations'; - } - else if (fileType === JURISPRUDENCIA_CODE) { + } else if (fileType === JURISPRUDENCIA_CODE) { return 'jurisprudencias'; } - throw 'Archivo con cms_collection_id desconocido'; + throw new Error('Archivo con cms_collection_id desconocido'); }; -gimporter.processFile = function($, file) { +gimporter.processFile = function ($, file) { var deferred = Q.defer(); var data = this.getDataFromFile($, file); + var tableName = this.getTableName($, file); - var tableName = this.getTableName($,file); - this.insertFile(data, tableName).then(function(){ + this.insertFile(data, tableName).then(function () { deferred.resolve(); }); diff --git a/lib/indexer.js b/lib/indexer.js index b0e8bac..385b192 100644 --- a/lib/indexer.js +++ b/lib/indexer.js @@ -1,55 +1,52 @@ -var - elasticsearch = require('elasticsearch'), - mysql = require('mysql'), - Q = require('q'); - - LEGISLACION_TABLE = 'legislations', - JURISPRUDENCIA_TABLE = 'jurisprudencias', - - INDEX_DOCUMENT = 'documento', - - INDEX_INTERVAL = 100, - - FIELD_TITULO_NORMA = 'titulo_norma', - FIELD_CONTENT = 'content', - FIELD_ID = 'id', - - INDEX_FIELDS = [FIELD_ID, FIELD_TITULO_NORMA, FIELD_CONTENT], - DOCUMENT_TABLES = [LEGISLACION_TABLE, JURISPRUDENCIA_TABLE], - - indexer = {}, - DOC_TYPES = {}; - - DOC_TYPES[LEGISLACION_TABLE] = 'legislacion'; - DOC_TYPES[JURISPRUDENCIA_TABLE] = 'jurisprudencias'; - -var connection = mysql.createConnection({ - host : process.env.DB_HOST || '127.0.0.1', - user : process.env.DB_USER, - password : process.env.DB_PASSWORD, - database : process.env.DB_DATABASE, - port : process.env.DB_PORT || 3306, - dateStrings : true +var elasticsearch = require('elasticsearch'); +var mysql = require('mysql'); +var Q = require('q'); + +var indexer = {}; +var DOC_TYPES = {}; +var connection = {}; +var client = {}; + +var LEGISLACION_TABLE = 'legislations'; +var JURISPRUDENCIA_TABLE = 'jurisprudencias'; +var INDEX_DOCUMENT = 'documento'; +var INDEX_INTERVAL = 100; +var FIELD_TITULO_NORMA = 'titulo_norma'; +var FIELD_CONTENT = 'content'; +var FIELD_ID = 'id'; +var INDEX_FIELDS = [FIELD_ID, FIELD_TITULO_NORMA, FIELD_CONTENT]; +var DOCUMENT_TABLES = [LEGISLACION_TABLE, JURISPRUDENCIA_TABLE]; + +DOC_TYPES[LEGISLACION_TABLE] = 'legislacion'; +DOC_TYPES[JURISPRUDENCIA_TABLE] = 'jurisprudencias'; + +connection = mysql.createConnection({ + host: process.env.DB_HOST || '127.0.0.1', + user: process.env.DB_USER, + password: process.env.DB_PASSWORD, + database: process.env.DB_DATABASE, + port: process.env.DB_PORT || 3306, + dateStrings: true }); connection.connect(); -var client = new elasticsearch.Client({ +client = new elasticsearch.Client({ host: 'localhost:9200' }); indexer.indexRows = function (rows, docType) { - var deferred = Q.defer(), - data = []; - - + var deferred = Q.defer(); + var data = []; - rows.map(function(item) { - var content = item.content.trim(), - title = item.titulo_norma.trim(), - id = item.id; + rows.map(function (item) { + var content = item.content.trim(); + var title = item.titulo_norma.trim(); + var id = item.id; + var docData = {}; + var docAction = {}; - var docData = { + docData = { title: title, content: content, suggest: { @@ -62,11 +59,11 @@ indexer.indexRows = function (rows, docType) { } }; - var docAction = { + docAction = { index: { _index: INDEX_DOCUMENT, _type: docType, - _id: id, + _id: id } }; @@ -76,7 +73,7 @@ indexer.indexRows = function (rows, docType) { client.bulk({ body: data - }, function(err, response) { + }, function (err) { if (err) { throw err; } @@ -89,76 +86,81 @@ indexer.indexRows = function (rows, docType) { function temp(from, total, tableName) { var deferred = Q.defer(); + indexer.indexRowsRange(from, tableName) - .then(function() { + .then(function () { + var newFrom; if (from + INDEX_INTERVAL >= total) { deferred.resolve(); } else { - var newFrom = from + INDEX_INTERVAL; + newFrom = from + INDEX_INTERVAL; temp(newFrom, total, tableName); } }); + return deferred.promise; } -function temp_reindex(from, total, tableName){ +function tempReindex(from, total, tableName) { var deferred = Q.defer(); - indexer.indexRowsLeft( tableName) - .then(function() { + + indexer.indexRowsLeft(tableName) + .then(function () { + var newFrom; if (from + INDEX_INTERVAL >= total) { deferred.resolve(); } else { - var newFrom = from + INDEX_INTERVAL; - temp_reindex(newFrom, total, tableName); + newFrom = from + INDEX_INTERVAL; + tempReindex(newFrom, total, tableName); } }); + return deferred.promise; } indexer.indexTable = function (tableName) { - var self = this, - docType = DOC_TYPES[tableName]; + connection.query('SELECT count(*) as total FROM ??', [tableName], function (err, results) { + var from; + var total; - connection.query('SELECT count(*) as total FROM ??', [tableName], function(err, results) { if (err) { throw err; } - var from = 0, - total = results[0].total; + from = 0; + total = results[0].total; - temp(from, total, tableName) - .then(function() { - }); + temp(from, total, tableName); }); }; -indexer.getRowsLeft = function(tableName){ - var self = this, - docType = DOC_TYPES[tableName]; +indexer.getRowsLeft = function (tableName) { + var queryParams = [tableName, 'NO']; - connection.query('SELECT count(*) as total FROM ?? WHERE indexado=?', [tableName,'NO'], function(err, results) { - if (err) { - throw err; - } + connection.query('SELECT count(*) as total FROM ?? WHERE indexado=?', queryParams, + function (err, results) { + var from; + var total; - var from = 0, - total = results[0].total; + if (err) { + throw err; + } - temp_reindex(from, total, tableName) - .then(function() { - }); + from = 0; + total = results[0].total; - }); + tempReindex(from, total, tableName); + }); }; -indexer.indexRowsRange = function(from, tableName) { - var self = this, - deferred = Q.defer(); +indexer.indexRowsRange = function (from, tableName) { + var self = this; + var deferred = Q.defer(); + var queryParams = [INDEX_FIELDS, tableName, from, 100]; - var q = connection.query('SELECT ?? FROM ?? LIMIT ?,?', [INDEX_FIELDS, tableName, from, 100], function (err, rows) { + connection.query('SELECT ?? FROM ?? LIMIT ?,?', queryParams, function (err, rows) { if (err) { throw err; } @@ -170,43 +172,59 @@ indexer.indexRowsRange = function(from, tableName) { }); return deferred.promise; -} +}; -indexer.indexRowsLeft = function( tableName) { - var self = this, - deferred = Q.defer(); +indexer.indexRowsLeft = function (tableName) { + var self = this; + var deferred = Q.defer(); + var indexed = 'NO'; + var queryParams = ['a']; - var q = connection.query('SELECT ?? FROM ?? WHERE indexado = ? LIMIT ?,?', [INDEX_FIELDS, tableName, 'NO', 0, 100], function (err, rows) { - if (err) { - throw err; - } - var ids = rows.map(function(a) {return a.id;}); + queryParams = [INDEX_FIELDS, tableName, indexed, 0, 100]; - self.indexRows(rows, DOC_TYPES[tableName]) - .then(function () { - self.updateRecord(ids,tableName).then(function(){ - deferred.resolve(); - }); + connection.query('SELECT ?? FROM ?? WHERE indexado = ? LIMIT ?,?', queryParams, + function (err, rows) { + var ids; + + if (err) { + throw err; + } + + ids = rows.map(function (a) { + return a.id; }); - }); + + self.indexRows(rows, DOC_TYPES[tableName]) + .then(function () { + self.updateRecord(ids, tableName).then(function () { + deferred.resolve(); + }); + }); + }); return deferred.promise; -} +}; -indexer.updateRecord = function (ids,tableName) { +indexer.updateRecord = function (ids, tableName) { var deferred = Q.defer(); - var data = {indexado: 'SI'}; - var q = connection.query('UPDATE ?? SET ? WHERE id IN (?)', [tableName, data, ids], function (err, rows, fields) { + var data = { + indexado: 'SI' + }; + var queryParams = [tableName, data, ids]; + + connection.query('UPDATE ?? SET ? WHERE id IN (?)', queryParams, + function (err) { if (err) { throw err; } deferred.resolve(); }); - return deferred.promise; + + return deferred.promise; }; -indexer.perform = function() { +indexer.perform = function () { var self = this; DOCUMENT_TABLES.forEach(function (tableName) { @@ -216,9 +234,9 @@ indexer.perform = function() { indexer.reindex = function () { var self = this; - DOCUMENT_TABLES.forEach(function (tableName){ + DOCUMENT_TABLES.forEach(function (tableName) { self.getRowsLeft(tableName); }); -} +}; -module.exports = indexer; \ No newline at end of file +module.exports = indexer; diff --git a/package.json b/package.json index 7a3f06b..8804c4f 100644 --- a/package.json +++ b/package.json @@ -16,5 +16,10 @@ "gulp-cheerio": "^0.6.2", "mysql": "^2.10.2", "q": "^1.4.1" + }, + "devDependencies": { + "eslint": "^1.10.3", + "eslint-config-airbnb": "^4.0.0", + "gulp-eslint": "^1.1.1" } }