From eb391f1273990ede22ecf1d6e728d1a4feb8e633 Mon Sep 17 00:00:00 2001 From: Jason Morgan Date: Fri, 26 Apr 2013 23:05:49 -0400 Subject: [PATCH] Fixed issue with Russian language text processing. Fixed issue #2. There was a problem converting the JSON objects for the post to AlchemyAPI. JSON.stringify caused Russian and probably other languages text to not be properly encoded and return incorrect results form AlchemyAPI. Added unit test for Russian keyword extraction. --- index.js | 21 ++++++++++++++++----- package.json | 2 +- test/index.js | 20 ++++++++++++++++++++ 3 files changed, 37 insertions(+), 6 deletions(-) diff --git a/index.js b/index.js index b6d6539..6c2108b 100644 --- a/index.js +++ b/index.js @@ -118,7 +118,8 @@ AlchemyAPI.prototype._doRequest = function(request_query, cb) { }); }); - + //console.dir(req); + //req.setEncoding("utf8"); req.on('socket', function(socket) { socket.on('error', function(err) { //console.log('socket error : ' + err); @@ -131,9 +132,16 @@ AlchemyAPI.prototype._doRequest = function(request_query, cb) { }); if(req.method == "POST") { - //console.log("POSTING"); - //console.log(querystring.stringify(request_query.post)); - req.end(querystring.stringify(request_query.post)); + + /* + Removed this because JSON.stringify was causing issue with unicode characters + //console.log(querystring.stringify(request_query.post)); + */ + if (request_query.post.text) { + req.end("text=" + request_query.post.text); + } else if (request_query.post.html) { + req.end("html=" + request_query.post.text); + } } else { req.end(); } @@ -194,14 +202,16 @@ AlchemyAPI.prototype._getQuery = function(data, opts, method) { query.headers = { 'content-length': '0' } + //console.log("======================1=================="); } else if(!this._htmlCheck(data)){ query.apimethod = "Text" + method; query.post = {text: data}; query.headers = { 'content-length': '' + data.length + '' - ,'content-type': 'application/x-www-form-urlencoded' + ,'content-type': 'multipart/form-data' }; + //console.log("======================2=================="); } else { query.post = {html: data}; @@ -209,6 +219,7 @@ AlchemyAPI.prototype._getQuery = function(data, opts, method) { 'content-length': '' + data.length + '' ,'content-type': 'application/x-www-form-urlencoded' }; + //console.log("======================3=================="); } query.nice = this._generateNiceUrl(query.url, options, query.apimethod); diff --git a/package.json b/package.json index bfabfa6..7dadeb3 100644 --- a/package.json +++ b/package.json @@ -2,7 +2,7 @@ "name": "alchemy-api", "description": "An Alchemy API library for Node.js", "tags": ["Alchemy", "Natural Language Processing", "util"], - "version": "0.8.8", + "version": "0.8.9", "author": "Jason Morgan ", "contributors": [ ], diff --git a/test/index.js b/test/index.js index f4ff8db..42d564e 100644 --- a/test/index.js +++ b/test/index.js @@ -60,6 +60,26 @@ module.exports = { test.done(); }); }, + 'get russian keywords': function(test) { + var alchemy = new Alchemy(apikey); + alchemy.keywords("http://www.framingeinstein.com/russian.html", {}, function(error, result) { + //console.log(result); + test.ifError(error); + //test.deepEqual(result.status, "OK"); + test.done(); + }); + }, + 'get russian keywords from text': function(test) { + var alchemy = new Alchemy(apikey); + var text = "сервис, который поможет все успеть и ничего не пропустить Создание событий добавьте в календарь напоминание о фильме или концерте или создавайте напоминания о своих делах Оповещение настройте оповещение и календарь предупредит вас Используйте другие службы Яндекса добавляйте события из Телепрограммы или Афиши"; + alchemy.keywords(text, {}, function(error, result) { + //console.log(result); + test.ifError(error); + //test.deepEqual(result.status, "OK"); + test.done(); + }); + }, + 'get category': function(test) { var alchemy = new Alchemy(apikey); alchemy.category(testURL, {}, function(error, result) {