diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..bea6102 --- /dev/null +++ b/.gitignore @@ -0,0 +1,21 @@ + +########### +# data # +########### +ltp_data + +########### +# outputs # +########### +output +CMakeFiles +cmake_install.cmake +CmakeCache.txt +Makefile +libs +target + +########### +# IDEs # +########### +.idea diff --git a/.gitmodules b/.gitmodules index f87c8e2..3db0bbd 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,3 @@ -[submodule "ltp"] - path = ltp +[submodule "src/main/c++/ltp"] + path = src/main/c++/ltp url = https://github.com/HIT-SCIR/ltp.git diff --git a/.travis.yml b/.travis.yml index fa16c62..3d99f30 100644 --- a/.travis.yml +++ b/.travis.yml @@ -6,20 +6,10 @@ os: - linux - osx -before_install: - - sudo apt-get install cmake +before_script: + - if [[ "$TRAVIS_OS_NAME" == "osx" && -z "$JAVA_HOME" && -x "/usr/libexec/java_home" ]] ; then export JAVA_HOME=$(/usr/libexec/java_home); fi + - git submodule init - git submodule update - - cd ltp - - ./configure - - make - - cd .. - -install: - - cmake -DLTP_HOME=`pwd`/ltp/ . - - make - - ant - - export LD_LIBRARY_PATH=LD_LIBRARY_PATH:`pwd`/libs/:`pwd`/ltp/lib script: - - javac -cp "output/jar/ltp4j.jar" examples/Test.java - - cat examples/example | java -cp "output/jar/ltp4j.jar:examples" Test --segment-model=ltp_data/cws.model --postag-model=ltp_data/pos.model --ner-model=ltp_data/ner.model --parser-model=ltp_data/parser.model --srl-dir=ltp_data/srl/ + - mvn -Dmaven.test.skip=true diff --git a/CMakeLists.txt b/CMakeLists.txt index 2dda73d..020def9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,7 +2,7 @@ cmake_minimum_required (VERSION 2.8.0) project ("ltp4j") find_package(JNI) -set (LTP_HOME "/path/to/your/ltp/" CACHE STRING "Use to specified ltp path") +set (LTP_HOME "${PROJECT_SOURCE_DIR}/ltp" CACHE STRING "Use to specified ltp path") # change it to your ltp root set (LIBRARY_OUTPUT_PATH ${PROJECT_SOURCE_DIR}/libs) set (JNI_SOURCE_DIR ${PROJECT_SOURCE_DIR}/jni) diff --git a/README.md b/README.md index d8cc9e4..b632b1b 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,12 @@ ltp4jbeta: Language Technology Platform For Java ============================================ [![Build Status](https://travis-ci.org/HIT-SCIR/ltp4j.svg?branch=integrate-test)](https://travis-ci.org/HIT-SCIR/ltp4j) +[![Documentation Status](https://readthedocs.org/projects/ltp/badge/?version=latest)](http://ltp4j.readthedocs.org/en/neoltp4j/?badge=neoltp4j) + +# 更新 + +1. ltp4j 现已经更新对 LTP 3.4.0的支持。 +2. 项目改用 maven 构建、编译。具体使用方法参见文档。 # 简介 @@ -11,5 +17,5 @@ ltp4j是语言技术平台[(Language Technology Platform, LTP)](https://github.c # 文档 -关于ltp4j的使用,请参考[ltp4j使用文档v1.0](https://github.com/HIT-SCIR/ltp4j/blob/master/doc/ltp4j-document-1.0.md) +请参考在线文档:[ltp4j使用文档](http://ltp4j.readthedocs.io) diff --git a/aol.properties b/aol.properties new file mode 100644 index 0000000..cc42c93 --- /dev/null +++ b/aol.properties @@ -0,0 +1,3 @@ +amd64.Windows.msvc.cpp.defines=Windows WIN32 _WINDOWS NOMINMAX BOOST_ALL_NO_LIB +amd64.Windows.msvc.c.defines=Windows WIN32 _WINDOWS NOMINMAX BOOST_ALL_NO_LIB + diff --git a/appveyor.yml b/appveyor.yml new file mode 100644 index 0000000..4f61774 --- /dev/null +++ b/appveyor.yml @@ -0,0 +1,30 @@ +version: '{build}' + +image: + - Visual Studio 2015 + - Visual Studio 2017 + +platform: x64 + +install: + - ps: | + Add-Type -AssemblyName System.IO.Compression.FileSystem + if (!(Test-Path -Path "C:\maven" )) { + (new-object System.Net.WebClient).DownloadFile( + 'http://www.us.apache.org/dist/maven/maven-3/3.3.9/binaries/apache-maven-3.3.9-bin.zip', + 'C:\maven-bin.zip' + ) + [System.IO.Compression.ZipFile]::ExtractToDirectory("C:\maven-bin.zip", "C:\maven") + } + - cmd: SET PATH=C:\maven\apache-maven-3.3.9\bin;%JAVA_HOME%\bin;%PATH% + - cmd: SET MAVEN_OPTS=-XX:MaxPermSize=2g -Xmx4g + - cmd: SET JAVA_OPTS=-XX:MaxPermSize=2g -Xmx4g + +build_script: + - git submodule init + - git submodule update + - mvn -Dmaven.test.skip=true + +cache: + - C:\maven\ + - C:\Users\appveyor\.m2 diff --git a/build.xml b/build.xml deleted file mode 100644 index f6a9d1c..0000000 --- a/build.xml +++ /dev/null @@ -1,27 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/doc/BaseDirectory.png b/doc/BaseDirectory.png deleted file mode 100644 index 80c4675..0000000 Binary files a/doc/BaseDirectory.png and /dev/null differ diff --git a/doc/_static/eclipse.gif b/doc/_static/eclipse.gif new file mode 100644 index 0000000..08bfd3e Binary files /dev/null and b/doc/_static/eclipse.gif differ diff --git a/doc/api.rst b/doc/api.rst new file mode 100644 index 0000000..34d7779 --- /dev/null +++ b/doc/api.rst @@ -0,0 +1,413 @@ +编程接口 +======== + +.. java:package:: edu.hit.ir.ltp4j + +分词接口 +-------- + +.. java:type:: public class Segmentor + +分词主要提供三个接口: + +.. java:method:: public final native int create(String modelPath) + + 功能: + + 读取模型文件,初始化分词器。 + + 参数: + + +---------------------+------------------------------------------------------------+ + | 参数名 | 参数描述 | + +=====================+============================================================+ + | String modelPath | 指定模型文件的路径 | + +---------------------+------------------------------------------------------------+ + + +.. java:method:: public final native void release() + + 功能: + + 释放模型文件,销毁分词器。 + +.. java:method:: public final native int segment(String sent, List words) + + 功能: + + 调用分词接口。 + + 参数: + + +---------------------+------------------------------------------------------------+ + | 参数名 | 参数描述 | + +=====================+============================================================+ + | String sent | 待分词句子 | + +---------------------+------------------------------------------------------------+ + | List words | 结果分词序列 | + +---------------------+------------------------------------------------------------+ + + +**示例程序** + +.. code:: java + + import java.util.ArrayList; + import java.util.List; + import edu.hit.ir.ltp4j.*; + + public class TestSegment { + public static void main(String[] args) { + if(Segmentor.create("../../../ltp_data/cws.model")<0){ + System.err.println("load failed"); + return; + } + + String sent = "我是中国人"; + List words = new ArrayList(); + int size = Segmentor.segment(sent,words); + + for(int i = 0; i words, List tags) + + 功能: + + 调用词性标注接口 + + 参数: + + +--------------------+--------------------------------------------------------------------+ + | 参数名 | 参数描述 | + +====================+====================================================================+ + | List words | 待标注的词序列 | + +--------------------+--------------------------------------------------------------------+ + | List tags | 词性标注结果,序列中的第i个元素是第i个词的词性 | + +--------------------+--------------------------------------------------------------------+ + +**示例程序** + +.. code:: java + + import java.util.ArrayList; + import java.util.List; + import edu.hit.ir.ltp4j.*; + + public class TestPostag { + public static void main(String[] args) { + if(Postagger.create("../../../ltp_data/pos.model")<0) { + System.err.println("load failed"); + return; + } + + List words= new ArrayList(); + words.add("我"); words.add("是"); + words.add("中国"); words.add("人"); + List postags= new ArrayList(); + + int size = Postagger.postag(words,postags); + for(int i = 0; i < size; i++) { + System.out.print(words.get(i)+"_"+postags.get(i)); + if(i==size-1) { + System.out.println(); + } else { + System.out.print("|"); + } + } + Postagger.release(); + } + } + + +命名实体识别接口 +------------------ + +.. java:type:: public class NER + +命名实体识别主要提供三个接口: + +.. java:method:: public final native int create(String modelPath) + + 功能: + + 读取模型文件,初始化命名实体识别器 + + 参数: + + +----------------------------------------+--------------------------------------------------------------------+ + | 参数名 | 参数描述 | + +========================================+====================================================================+ + | const char * path | 命名实体识别模型路径 | + +----------------------------------------+--------------------------------------------------------------------+ + + 返回值: + + 返回一个指向词性标注器的指针。 + +.. java:method:: public final native void release() + + 功能: + + 释放模型文件,销毁命名实体识别器。 + + +.. java:method:: public final native int recognize(List words, List postags, List ners) + + 功能: + + 调用命名实体识别接口 + + 参数: + + +----------------------+----------------------------------------------------------------------------------------+ + | 参数名 | 参数描述 | + +======================+========================================================================================+ + | List words | 待识别的词序列 | + +----------------------+----------------------------------------------------------------------------------------+ + | List postags | 待识别的词的词性序列 | + +----------------------+----------------------------------------------------------------------------------------+ + | List ners | | 命名实体识别结果, | + | | | 命名实体识别的结果为O时表示这个词不是命名实体, | + | | | 否则为{POS}-{TYPE}形式的标记,POS代表这个词在命名实体中的位置,TYPE表示命名实体类型 | + +----------------------+----------------------------------------------------------------------------------------+ + + +**示例程序** + +.. code:: java + + import java.util.ArrayList; + import java.util.List; + import edu.hit.ir.ltp4j.*; + + public class TestNer { + + public static void main(String[] args) { + if(NER.create("../../../ltp_data/ner.model")<0) { + System.err.println("load failed"); + return; + } + List words = new ArrayList(); + List tags = new ArrayList(); + List ners = new ArrayList(); + words.add("中国");tags.add("ns"); + words.add("国际");tags.add("n"); + words.add("广播");tags.add("n"); + words.add("电台");tags.add("n"); + words.add("创办");tags.add("v"); + words.add("于");tags.add("p"); + words.add("1941年");tags.add("m"); + words.add("12月");tags.add("m"); + words.add("3日");tags.add("m"); + words.add("。");tags.add("wp"); + + NER.recognize(words, tags, ners); + + for (int i = 0; i < words.size(); i++) { + System.out.println(ners.get(i)); + } + + NER.release(); + + } + } + +依存句法分析接口 +----------------- + +.. java:type:: public class Parser + +依存句法分析主要提供三个接口: + +.. java:method:: public final native int create(String modelPath) + + 功能: + + 读取模型文件,初始化依存句法分析器 + + 参数: + + +---------------------------------------+--------------------------------------------------------------------+ + | 参数名 | 参数描述 | + +=======================================+====================================================================+ + | String modelPath | 依存句法分析模型路径 | + +---------------------------------------+--------------------------------------------------------------------+ + +.. java:method:: public final native void release() + + 功能: + + 释放模型文件,销毁依存句法分析器。 + +.. java:method:: public final native int parse(List words, List tags, List heads, List deprels) + + 功能: + + 调用依存句法分析接口 + + 参数: + + +----------------------+--------------------------------------------------------------------+ + | 参数名 | 参数描述 | + +======================+====================================================================+ + | List words | 待分析的词序列 | + +----------------------+--------------------------------------------------------------------+ + | List tags | 待分析的词的词性序列 | + +----------------------+--------------------------------------------------------------------+ + | List heads | 结果依存弧,heads[i]代表第i个词的父亲节点的编号 | + +----------------------+--------------------------------------------------------------------+ + | List deprels | 结果依存弧关系类型 | + +----------------------+--------------------------------------------------------------------+ + + +**示例程序** + +.. code:: java + + import java.util.ArrayList; + import java.util.List; + import edu.hit.ir.ltp4j.*; + + public class TestParse { + + public static void main(String[] args){ + Parser parser = new Parser(); + if(parser.create("./model/ltp_data/parser.model") < 0){ + throw new RuntimeException("fail to load parser model"); + } + List words = new ArrayList<>(); + List postags = new ArrayList<>(); + words.add("一把手"); postags.add("n"); + words.add("亲自"); postags.add("d"); + words.add("过河"); postags.add("v"); + words.add("。"); postags.add("wp"); + + List heads = new ArrayList<>(); + List deprels = new ArrayList<>(); + + parser.parse(words, postags, heads, deprels); + + for(int i=0; i words, List tags, List heads, List deprels, List>>>> srls) + + 功能: + + 调用命名实体识别接口 + + 参数: + + +---------------------------------------------------+-----------------------------------------------------------+ + | 参数名 | 参数描述 | + +===================================================+===========================================================+ + | List words | 输入的词序列 | + +---------------------------------------------------+-----------------------------------------------------------+ + | List tags | 输入的词性序列 | + +---------------------------------------------------+-----------------------------------------------------------+ + | List heads | 这个词的父节点的编号 [#f1]_ | + +---------------------------------------------------+-----------------------------------------------------------+ + | List deprels | 这个词的父节点的依存关系类型 | + +---------------------------------------------------+-----------------------------------------------------------+ + | List>>>> srls | 结果语义角色标注 | + +---------------------------------------------------+-----------------------------------------------------------+ + +常见问题 +-------- + +.. rubric:: 注 + +.. [#f1] 编号从0记起 \ No newline at end of file diff --git a/doc/background.rst b/doc/background.rst new file mode 100644 index 0000000..d8dbd59 --- /dev/null +++ b/doc/background.rst @@ -0,0 +1,21 @@ +简介与背景知识 +============== + +ltp4j是 `语言技术平台 (Language Technology Platform, LTP) `_ 接口的一个Java封装。 本项目旨在使Java用户可以本地调用LTP。 + +在使用ltp4j之前,您需要简要了解 + +* 什么是语言技术平台,它能否帮助您解决问题 +* 如何安装语言技术平台 +* 语言技术平台提供哪些编程接口 + +如果您对这些问题不了解,请首先阅读我们提供的有关语言技术平台的 `文档 `_ 。在本文档的后续中,我们假定您已经阅读并成功编译并使用语言技术平台。 + + +ltj4j的基本实现思路是依靠JNI技术实现在Java中调用C/C++动态库。我们建议您使用几分钟了解 `Java调用C/C++动态库 `_ 的实践方式。 + +ltp4j整个项目由两部分组成,他们分别是: + +* ltp4j.jar:Java接口程序。 +* C++代理程序:ltp4j加载的ltp动态库。 + diff --git a/doc/cmake.png b/doc/cmake.png deleted file mode 100644 index 1e2a532..0000000 Binary files a/doc/cmake.png and /dev/null differ diff --git a/doc/conf.py b/doc/conf.py new file mode 100644 index 0000000..0946661 --- /dev/null +++ b/doc/conf.py @@ -0,0 +1,260 @@ +# -*- coding: utf-8 -*- +# +# LTP documentation build configuration file, created by +# sphinx-quickstart on Mon Jan 19 17:24:17 2015. +# +# This file is execfile()d with the current directory set to its +# containing dir. +# +# Note that not all possible configuration values are present in this +# autogenerated file. +# +# All configuration values have a default; values that are commented out +# serve to show the default. + +import sys +import os + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +#sys.path.insert(0, os.path.abspath('.')) + +# -- General configuration ------------------------------------------------ + +# If your documentation needs a minimal Sphinx version, state it here. +#needs_sphinx = '1.0' + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = ['javasphinx'] + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# The suffix of source filenames. +source_suffix = '.rst' + +# The encoding of source files. +#source_encoding = 'utf-8-sig' + +# The master toctree document. +master_doc = 'index' + +# General information about the project. +project = u'ltp4j' +copyright = u'2016, HIT-SCIR' + +# The version info for the project you're documenting, acts as replacement for +# |version| and |release|, also used in various other places throughout the +# built documents. +# +# The short X.Y version. +version = '0.1' +# The full version, including alpha/beta/rc tags. +release = '0.1-SNAPSHORT' + +# The language for content autogenerated by Sphinx. Refer to documentation +# for a list of supported languages. +#language = None + +# There are two options for replacing |today|: either, you set today to some +# non-false value, then it is used: +#today = '' +# Else, today_fmt is used as the format for a strftime call. +#today_fmt = '%B %d, %Y' + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +exclude_patterns = ['_build'] + +# The reST default role (used for this markup: `text`) to use for all +# documents. +#default_role = None + +# If true, '()' will be appended to :func: etc. cross-reference text. +#add_function_parentheses = True + +# If true, the current module name will be prepended to all description +# unit titles (such as .. function::). +#add_module_names = True + +# If true, sectionauthor and moduleauthor directives will be shown in the +# output. They are ignored by default. +#show_authors = False + +# The name of the Pygments (syntax highlighting) style to use. +pygments_style = 'sphinx' + +# A list of ignored prefixes for module index sorting. +#modindex_common_prefix = [] + +# If true, keep warnings as "system message" paragraphs in the built documents. +#keep_warnings = False + + +# -- Options for HTML output ---------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +html_theme = 'alabaster' + +# Theme options are theme-specific and customize the look and feel of a theme +# further. For a list of options available for each theme, see the +# documentation. +#html_theme_options = {} + +# Add any paths that contain custom themes here, relative to this directory. +#html_theme_path = [] + +# The name for this set of Sphinx documents. If None, it defaults to +# " v documentation". +#html_title = None + +# A shorter title for the navigation bar. Default is the same as html_title. +#html_short_title = None + +# The name of an image file (relative to this directory) to place at the top +# of the sidebar. +#html_logo = None + +# The name of an image file (within the static path) to use as favicon of the +# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 +# pixels large. +#html_favicon = None + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ['_static'] + +# Add any extra paths that contain custom files (such as robots.txt or +# .htaccess) here, relative to this directory. These files are copied +# directly to the root of the documentation. +#html_extra_path = [] + +# If not '', a 'Last updated on:' timestamp is inserted at every page bottom, +# using the given strftime format. +#html_last_updated_fmt = '%b %d, %Y' + +# If true, SmartyPants will be used to convert quotes and dashes to +# typographically correct entities. +#html_use_smartypants = True + +# Custom sidebar templates, maps document names to template names. +#html_sidebars = {} + +# Additional templates that should be rendered to pages, maps page names to +# template names. +#html_additional_pages = {} + +# If false, no module index is generated. +#html_domain_indices = True + +# If false, no index is generated. +#html_use_index = True + +# If true, the index is split into individual pages for each letter. +#html_split_index = False + +# If true, links to the reST sources are added to the pages. +#html_show_sourcelink = True + +# If true, "Created using Sphinx" is shown in the HTML footer. Default is True. +#html_show_sphinx = True + +# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. +#html_show_copyright = True + +# If true, an OpenSearch description file will be output, and all pages will +# contain a tag referring to it. The value of this option must be the +# base URL from which the finished HTML is served. +#html_use_opensearch = '' + +# This is the file name suffix for HTML files (e.g. ".xhtml"). +#html_file_suffix = None + +# Output file base name for HTML help builder. +htmlhelp_basename = 'LTPdoc' + + +# -- Options for LaTeX output --------------------------------------------- + +latex_elements = { +# The paper size ('letterpaper' or 'a4paper'). +#'papersize': 'letterpaper', + +# The font size ('10pt', '11pt' or '12pt'). +#'pointsize': '10pt', + +# Additional stuff for the LaTeX preamble. +#'preamble': '', +} + +# Grouping the document tree into LaTeX files. List of tuples +# (source start file, target name, title, +# author, documentclass [howto, manual, or own class]). +latex_documents = [ + ('index', 'LTP.tex', u'LTP Documentation', + u'HIT-SCIR', 'manual'), +] + +# The name of an image file (relative to this directory) to place at the top of +# the title page. +#latex_logo = None + +# For "manual" documents, if this is true, then toplevel headings are parts, +# not chapters. +#latex_use_parts = False + +# If true, show page references after internal links. +#latex_show_pagerefs = False + +# If true, show URL addresses after external links. +#latex_show_urls = False + +# Documents to append as an appendix to all manuals. +#latex_appendices = [] + +# If false, no module index is generated. +#latex_domain_indices = True + + +# -- Options for manual page output --------------------------------------- + +# One entry per manual page. List of tuples +# (source start file, name, description, authors, manual section). +man_pages = [ + ('index', 'ltp', u'LTP Documentation', + [u'HIT-SCIR'], 1) +] + +# If true, show URL addresses after external links. +#man_show_urls = False + + +# -- Options for Texinfo output ------------------------------------------- + +# Grouping the document tree into Texinfo files. List of tuples +# (source start file, target name, title, author, +# dir menu entry, description, category) +texinfo_documents = [ + ('index', 'ltp4j', u'ltp4j Documentation', + u'HIT-SCIR', 'ltp4j', 'One line description of project.', + 'Miscellaneous'), +] + +# Documents to append as an appendix to all manuals. +#texinfo_appendices = [] + +# If false, no module index is generated. +#texinfo_domain_indices = True + +# How to display URL addresses: 'footnote', 'no', or 'inline'. +#texinfo_show_urls = 'footnote' + +# If true, do not generate a @detailmenu in the "Top" node's menu. +#texinfo_no_detailmenu = False + + diff --git a/doc/index.rst b/doc/index.rst new file mode 100644 index 0000000..424b906 --- /dev/null +++ b/doc/index.rst @@ -0,0 +1,24 @@ +.. ltpdoctest documentation master file, created by + sphinx-quickstart on Wed Jan 14 22:35:55 2015. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +.. include:: background.rst + +目录 +===== + +.. toctree:: + :maxdepth: 2 + + install + run + api + + +索引及表格 +================== + +* :ref:`genindex` +* :ref:`search` + diff --git a/doc/install.rst b/doc/install.rst new file mode 100644 index 0000000..05854db --- /dev/null +++ b/doc/install.rst @@ -0,0 +1,78 @@ +.. _install-label: + +编译ltp4j +========= + +如果您需要使用ltp4j,必须拥有两部分内容 + +* ltp4j.jar与C++代理程序 +* LTP模型文件 + +其中,LTP模型文件可以从 `百度云 `_,当前ltp4j对应的模型版本为3.3.1。本文档将着重介绍如何编译ltp4j.jar与其C++代理程序。 + +安装Maven +--------- + +ltp4j使用 `apache maven `_ 进行构建。在构建ltp4j之前,您首先需要安装maven。安装方法请参考: `安装apache maven `_。 + +编译ltp4j +--------- + +在确保安装maven的前提下(即 `mvn -h` 具有输出结果),您可以按照如下方式构建ltp4j。 + +1. 在命令行下进入ltp4j所在文件夹 +2. `git submodule init` +3. `git submodule update` +4. `mvn -Dmaven.test.skip=true` + +如果您编译提示成功同时项目根目录下包含 `target/ltp4j-{version}.jar`,证明已经编译成功。 + +nar-maven-plugin +~~~~~~~~~~~~~~~~ + +本部分将介绍编译ltp4j的一些技术考虑,与编译ltp4j无关。对这部分不感兴趣的用户可以忽略这部分文档。 + +ltp4j的基本技术考虑是 **使用户使用最简单的技术手段编译使用ltp4j** 。所以我们选择了maven作为构建工具,希望可以通过一条指令完成编译过程。 +如前文所述,ltp4j需要ltp4j.jar及其C++代理程序两部分。 +为了在maven中既能够使用java编译器编译jar又能够使用C++编译器编译C++代理程序,我们经过调研,决定使用 `nar-maven-plugin `_ 。这一maven插件使我们可以在不同的系统架构下编译C++的代码 (AOL)。 +在使用过程中,我们发现了这一插件的一系列bug,并通过贡献代码的方式进行了解决。 + + +编译结果 +-------- + +nar-maven-plugin的编译结果随操作系统的不同而存在差异。其生成的ltp4j.jar以及代理文件可以从如下路径找到 + +* jar:`./target/ltp4j-{version}.jar` +* 代理程序:`./target/ltp4j-{version}-{AOL}-jni/` + +其中,`vesion` 代表ltp4j的版本。`AOL` 代表 **体系结构-系统-链接器** 。 +举例来讲, + +* Windows 64位系统使用MSVC编译对应的AOL为:amd64-Windows-msvc +* Ubuntu 64位系统使用gnuc++编译对应的AOL为:amd64-Linux-gpp + +编译结果示例 +~~~~~~~~~~~~ + +**64位Linux g++** + +.. code:: shell + + $ find target/ -type f -name "*.jar" -or -name "*.so" + target/ltp4j-0.1.0-SNAPSHOT.jar + target/nar/ltp4j-0.1.0-SNAPSHOT-amd64-Linux-gpp-jni/lib/amd64-Linux-gpp/jni/libltp4j-0.1.0-SNAPSHOT.so + + +**64位windows MSVC** + +.. code:: shell + + $ find target/ -type f -name "*.jar" -or -name "*.dll" + target/ltp4j-0.1.0-SNAPSHOT.jar + target/nar/ltp4j-0.1.0-SNAPSHOT-amd64-Windows-msvc-jni/lib/amd64-Windows-msvc/jni/ltp4j-0.1.0-SNAPSHOT.dll + + +常见问题 +-------- + diff --git a/doc/ltp4j-document-1.0.md b/doc/ltp4j-document-1.0.md index 37b7a86..09f9cfd 100644 --- a/doc/ltp4j-document-1.0.md +++ b/doc/ltp4j-document-1.0.md @@ -55,6 +55,10 @@ ant 填好后执行run,build/jar下产生名为ltp4j.jar的jar文件。 +### Intellij Idea + +配置maven。点击右侧的MavenProject。导入pom.xml。 + ## 编译C++代理程序 代理程序jni动态库依赖于ltp的动态库,请先行编译LTP。 @@ -551,7 +555,7 @@ edu.ir.hit.ltp4j.SRL public class TestSrl { public static void main(String[] args) { - SRL.create("../../../ltp_data/srl"); + SRL.create("../../../ltp_data/pisrl.model"); ArrayList words = new ArrayList(); words.add("一把手"); words.add("亲自"); @@ -562,11 +566,6 @@ edu.ir.hit.ltp4j.SRL tags.add("d"); tags.add("v"); tags.add("wp"); - ArrayList ners = new ArrayList(); - ners.add("O"); - ners.add("O"); - ners.add("O"); - ners.add("O"); ArrayList heads = new ArrayList(); heads.add(2); heads.add(2); @@ -578,7 +577,7 @@ edu.ir.hit.ltp4j.SRL deprels.add("HED"); deprels.add("WP"); List>>>> srls = new ArrayList>>>>(); - SRL.srl(words, tags, ners, heads, deprels, srls); + SRL.srl(words, tags, heads, deprels, srls); for (int i = 0; i < srls.size(); ++i) { System.out.println(srls.get(i).first + ":"); for (int j = 0; j < srls.get(i).second.size(); ++j) { diff --git a/doc/make.bat b/doc/make.bat new file mode 100644 index 0000000..68179d0 --- /dev/null +++ b/doc/make.bat @@ -0,0 +1,242 @@ +@ECHO OFF + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set BUILDDIR=_build +set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% . +set I18NSPHINXOPTS=%SPHINXOPTS% . +if NOT "%PAPER%" == "" ( + set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS% + set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS% +) + +if "%1" == "" goto help + +if "%1" == "help" ( + :help + echo.Please use `make ^` where ^ is one of + echo. html to make standalone HTML files + echo. dirhtml to make HTML files named index.html in directories + echo. singlehtml to make a single large HTML file + echo. pickle to make pickle files + echo. json to make JSON files + echo. htmlhelp to make HTML files and a HTML help project + echo. qthelp to make HTML files and a qthelp project + echo. devhelp to make HTML files and a Devhelp project + echo. epub to make an epub + echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter + echo. text to make text files + echo. man to make manual pages + echo. texinfo to make Texinfo files + echo. gettext to make PO message catalogs + echo. changes to make an overview over all changed/added/deprecated items + echo. xml to make Docutils-native XML files + echo. pseudoxml to make pseudoxml-XML files for display purposes + echo. linkcheck to check all external links for integrity + echo. doctest to run all doctests embedded in the documentation if enabled + goto end +) + +if "%1" == "clean" ( + for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i + del /q /s %BUILDDIR%\* + goto end +) + + +%SPHINXBUILD% 2> nul +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.http://sphinx-doc.org/ + exit /b 1 +) + +if "%1" == "html" ( + %SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The HTML pages are in %BUILDDIR%/html. + goto end +) + +if "%1" == "dirhtml" ( + %SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml. + goto end +) + +if "%1" == "singlehtml" ( + %SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml. + goto end +) + +if "%1" == "pickle" ( + %SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle + if errorlevel 1 exit /b 1 + echo. + echo.Build finished; now you can process the pickle files. + goto end +) + +if "%1" == "json" ( + %SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json + if errorlevel 1 exit /b 1 + echo. + echo.Build finished; now you can process the JSON files. + goto end +) + +if "%1" == "htmlhelp" ( + %SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp + if errorlevel 1 exit /b 1 + echo. + echo.Build finished; now you can run HTML Help Workshop with the ^ +.hhp project file in %BUILDDIR%/htmlhelp. + goto end +) + +if "%1" == "qthelp" ( + %SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp + if errorlevel 1 exit /b 1 + echo. + echo.Build finished; now you can run "qcollectiongenerator" with the ^ +.qhcp project file in %BUILDDIR%/qthelp, like this: + echo.^> qcollectiongenerator %BUILDDIR%\qthelp\LTP.qhcp + echo.To view the help file: + echo.^> assistant -collectionFile %BUILDDIR%\qthelp\LTP.ghc + goto end +) + +if "%1" == "devhelp" ( + %SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. + goto end +) + +if "%1" == "epub" ( + %SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The epub file is in %BUILDDIR%/epub. + goto end +) + +if "%1" == "latex" ( + %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex + if errorlevel 1 exit /b 1 + echo. + echo.Build finished; the LaTeX files are in %BUILDDIR%/latex. + goto end +) + +if "%1" == "latexpdf" ( + %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex + cd %BUILDDIR%/latex + make all-pdf + cd %BUILDDIR%/.. + echo. + echo.Build finished; the PDF files are in %BUILDDIR%/latex. + goto end +) + +if "%1" == "latexpdfja" ( + %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex + cd %BUILDDIR%/latex + make all-pdf-ja + cd %BUILDDIR%/.. + echo. + echo.Build finished; the PDF files are in %BUILDDIR%/latex. + goto end +) + +if "%1" == "text" ( + %SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The text files are in %BUILDDIR%/text. + goto end +) + +if "%1" == "man" ( + %SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The manual pages are in %BUILDDIR%/man. + goto end +) + +if "%1" == "texinfo" ( + %SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo. + goto end +) + +if "%1" == "gettext" ( + %SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The message catalogs are in %BUILDDIR%/locale. + goto end +) + +if "%1" == "changes" ( + %SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes + if errorlevel 1 exit /b 1 + echo. + echo.The overview file is in %BUILDDIR%/changes. + goto end +) + +if "%1" == "linkcheck" ( + %SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck + if errorlevel 1 exit /b 1 + echo. + echo.Link check complete; look for any errors in the above output ^ +or in %BUILDDIR%/linkcheck/output.txt. + goto end +) + +if "%1" == "doctest" ( + %SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest + if errorlevel 1 exit /b 1 + echo. + echo.Testing of doctests in the sources finished, look at the ^ +results in %BUILDDIR%/doctest/output.txt. + goto end +) + +if "%1" == "xml" ( + %SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The XML files are in %BUILDDIR%/xml. + goto end +) + +if "%1" == "pseudoxml" ( + %SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml. + goto end +) + +:end diff --git a/doc/requirements.txt b/doc/requirements.txt new file mode 100644 index 0000000..a0101be --- /dev/null +++ b/doc/requirements.txt @@ -0,0 +1 @@ +javasphinx diff --git a/doc/run.rst b/doc/run.rst new file mode 100644 index 0000000..e7f730a --- /dev/null +++ b/doc/run.rst @@ -0,0 +1,80 @@ +运行 +==== + +以编译运行examples/Test.java为例。 + +概念 +~~~~ + +**version与aol** + +version是当前ltp4j的版本号,aol是使用ltp4j机器的`架构-系统-链接器` + +在命令行条件下可以用如下命令获得 + +.. code:: shell + + # get version + version=`egrep '' pom.xml | head -1 | tr -d ' ' | sed 's///g' | sed 's/<\/version>//g'` + + # get aol + aol=`ls target/ltp4j-${version}-*-jni.nar | sed "s/target\/ltp4j-${version}-//g" | sed "s/-jni.nar//g"` + + +在其他条件下可以根据编译步骤生成的的nar文件进行判断。 +具体来讲编译步骤生成的nar文件的格式为`target/ltp4j-${version}-${aol}-jni.jar`。 + +**运行jni的必要条件** + +1. 添加ltp4j.jar到java项目的classpath里 +2. 添加c++代理程序的路径到java.library.path里 + +命令行 +~~~~~~ + +.. code:: shell + + # get version + version=`egrep '' pom.xml | head -1 | tr -d ' ' | sed 's///g' | sed 's/<\/version>//g'` + + # get aol + aol=`ls target/ltp4j-${version}-*-jni.nar | sed "s/target\/ltp4j-${version}-//g" | sed "s/-jni.nar//g"` + + # compile + javac -cp "target/ltp4j-${version}.jar" examples/Test.java + + # run, specifying java.library.path, Test accept input from stdin + cat examples/example | java -Djava.library.path=target/nar/ltp4j-$version-$aol-jni/lib/$aol/jni/ \ + -cp "target/ltp4j-${version}.jar:examples" Test \ + --segment-model=ltp_data/cws.model \ + --postag-model=ltp_data/pos.model \ + --ner-model=ltp_data/ner.model \ + --parser-model=ltp_data/parser.model \ + --srl-dir=ltp_data/srl/ + +Eclipse +~~~~~~~ + +1. File -> New -> Java Project, 在Project name处填入ltp4jtest +2. 右键examples项目下的src文件夹,在弹出菜单下选择New -> Class,Name处填入Test +3. 将examples/Test.java填入Test中 +4. 右键examples项目下的Properties, + 1. 选择 `Java Build Path` + 2. 选择 `Libraries` 选项卡 + 3. 选择 `Add External JAR...` 选择编译出的ltp4j-$version.jar文件 [添加ltp4.jar] + 4. 点击 > 箭头展开添加的ltp4j-$version.jar,在Native library location中选择C++代理程序的路径 [添加java.library.path] + +如图所示: + +.. image:: _static/eclipse.gif + +参考: `How to set the java.library.path from Eclipse `_ + +Intellij +~~~~~~~~ + +参考:`How to set the java.library.path in intelliJ Idea `_ + +常见问题 +~~~~~~~~ + diff --git a/examples/Test.java b/examples/Console.java similarity index 70% rename from examples/Test.java rename to examples/Console.java index 212055d..1b82923 100644 --- a/examples/Test.java +++ b/examples/Console.java @@ -9,13 +9,20 @@ import edu.hit.ir.ltp4j.SRL; import edu.hit.ir.ltp4j.Pair; -public class Test { +public class Console { private String segmentModel; private String postagModel; private String NERModel; private String parserModel; private String SRLModel; + private SplitSentence sentenceSplitApp; + private Segmentor segmentorApp; + private Postagger postaggerApp; + private NER nerApp; + private Parser parserApp; + private SRL srlApp; + private boolean ParseArguments(String[] args) { if (args.length == 1 && (args[0].equals("--help") || args[0].equals("-h"))) { Usage(); @@ -31,7 +38,7 @@ private boolean ParseArguments(String[] args) { NERModel = args[i].split("=")[1]; } else if (args[i].startsWith("--parser-model=")) { parserModel = args[i].split("=")[1]; - } else if (args[i].startsWith("--srl-dir=")) { + } else if (args[i].startsWith("--srl-model=")) { SRLModel = args[i].split("=")[1]; } else { throw new IllegalArgumentException("Unknown options " + args[i]); @@ -40,19 +47,30 @@ private boolean ParseArguments(String[] args) { if (segmentModel == null || postagModel == null || NERModel == null || parserModel == null || SRLModel == null) { + Usage(); throw new IllegalArgumentException(""); } - Segmentor.create(segmentModel); - Postagger.create(postagModel); - NER.create(NERModel); - Parser.create(parserModel); - SRL.create(SRLModel); + sentenceSplitApp = new SplitSentence(); + + segmentorApp = new Segmentor(); + segmentorApp.create(segmentModel); + + postaggerApp = new Postagger(); + postaggerApp.create(postagModel); + + nerApp = new NER(); + nerApp.create(NERModel); + + parserApp = new Parser(); + parserApp.create(parserModel); + + srlApp = new SRL(); + srlApp.create(SRLModel); return true; } - public void Usage() { System.err.println("An command line example for ltp4j - The Java embedding of LTP"); System.err.println("Sentences are inputted from stdin."); @@ -63,10 +81,9 @@ public void Usage() { System.err.println(" --postag-model= \\"); System.err.println(" --ner-model= \\"); System.err.println(" --parser-model= \\"); - System.err.println(" --srl-dir="); + System.err.println(" --srl-model="); } - private String join(ArrayList payload, String conjunction) { StringBuilder sb = new StringBuilder(); if (payload == null || payload.size() == 0) { @@ -82,9 +99,7 @@ private String join(ArrayList payload, String conjunction) { public void Analyse(String sent) { ArrayList sents = new ArrayList(); - SplitSentence.splitSentence(sent,sents); - - // System.out.println("sents:"+sents.size()); + sentenceSplitApp.splitSentence(sent, sents); for(int m = 0; m < sents.size(); m++) { ArrayList words = new ArrayList(); @@ -95,32 +110,36 @@ public void Analyse(String sent) { List>>>> srls = new ArrayList>>>>(); - System.out.println("#" + (m+1)); + System.out.println("#" + (m + 1)); System.out.println("Sentence : " + sents.get(m)); - Segmentor.segment(sents.get(m), words); + segmentorApp.segment(sents.get(m), words); System.out.println("Segment Result : " + join(words, "\t")); - Postagger.postag(words,postags); - System.out.println("Postag Result : " + join(postags, "\t")); + postaggerApp.postag(words, postags); + System.out.print("Postag Result : "); + System.out.println(join(postags, "\t")); - NER.recognize(words,postags,ners); - System.out.println("NER Result : " + join(ners, "\t")); + nerApp.recognize(words, postags, ners); + System.out.print("NER Result : "); + System.out.println(join(ners, "\t")); - Parser.parse(words,postags,heads,deprels); + parserApp.parse(words, postags, heads, deprels); int size = heads.size(); StringBuilder sb = new StringBuilder(); sb.append(heads.get(0)).append(":").append(deprels.get(0)); for(int i = 1; i < heads.size(); i++) { sb.append("\t").append(heads.get(i)).append(":").append(deprels.get(i)); } - System.out.println("Parse Result : " + sb.toString()); + System.out.print("Parse Result : "); + System.out.println(sb.toString()); for (int i = 0; i < heads.size(); i++) { heads.set(i, heads.get(i) - 1); } - SRL.srl(words,postags,ners,heads,deprels,srls); + srlApp.srl(words,postags,heads,deprels,srls); + size = srls.size(); System.out.print("SRL Result : "); if (size == 0) { @@ -138,32 +157,34 @@ public void Analyse(String sent) { } } - public static void release(){ - Segmentor.release(); - Postagger.release(); - NER.release(); - Parser.release(); - SRL.release(); + public void release(){ + segmentorApp.release(); + postaggerApp.release(); + nerApp.release(); + parserApp.release(); + srlApp.release(); } public static void main(String[] args) { - Test test = new Test(); + Console console = new Console(); try { - if (!test.ParseArguments(args)) { + if (!console.ParseArguments(args)) { return; } Scanner input = new Scanner(System.in); String sent; try { - while((sent = input.nextLine())!=null){ - if(sent.length()>0){ - test.Analyse(sent); + System.out.print(">>> "); + while((sent = input.nextLine()) != null) { + if (sent.length() > 0) { + console.Analyse(sent); } + System.out.print(">>> "); } } catch(Exception e) { - release(); + console.release(); } } catch (IllegalArgumentException e) { } diff --git a/jni/edu_hit_ir_ltp4j_NER.h b/jni/edu_hit_ir_ltp4j_NER.h deleted file mode 100644 index d66d4eb..0000000 --- a/jni/edu_hit_ir_ltp4j_NER.h +++ /dev/null @@ -1,37 +0,0 @@ -/* DO NOT EDIT THIS FILE - it is machine generated */ -#include -/* Header for class edu_hit_ir_ltp4j_NER */ - -#ifndef _Included_edu_hit_ir_ltp4j_NER -#define _Included_edu_hit_ir_ltp4j_NER -#ifdef __cplusplus -extern "C" { -#endif -/* - * Class: edu_hit_ir_ltp4j_NER - * Method: create - * Signature: (Ljava/lang/String;)I - */ -JNIEXPORT jint JNICALL Java_edu_hit_ir_ltp4j_NER_create - (JNIEnv *, jclass, jstring); - -/* - * Class: edu_hit_ir_ltp4j_NER - * Method: recognize - * Signature: (Ljava/util/List;Ljava/util/List;Ljava/util/List;)I - */ -JNIEXPORT jint JNICALL Java_edu_hit_ir_ltp4j_NER_recognize - (JNIEnv *, jclass, jobject, jobject, jobject); - -/* - * Class: edu_hit_ir_ltp4j_NER - * Method: release - * Signature: ()V - */ -JNIEXPORT void JNICALL Java_edu_hit_ir_ltp4j_NER_release - (JNIEnv *, jclass); - -#ifdef __cplusplus -} -#endif -#endif diff --git a/jni/edu_hit_ir_ltp4j_Parser.h b/jni/edu_hit_ir_ltp4j_Parser.h deleted file mode 100644 index 4b59eff..0000000 --- a/jni/edu_hit_ir_ltp4j_Parser.h +++ /dev/null @@ -1,37 +0,0 @@ -/* DO NOT EDIT THIS FILE - it is machine generated */ -#include -/* Header for class edu_hit_ir_ltp4j_Parser */ - -#ifndef _Included_edu_hit_ir_ltp4j_Parser -#define _Included_edu_hit_ir_ltp4j_Parser -#ifdef __cplusplus -extern "C" { -#endif -/* - * Class: edu_hit_ir_ltp4j_Parser - * Method: create - * Signature: (Ljava/lang/String;)I - */ -JNIEXPORT jint JNICALL Java_edu_hit_ir_ltp4j_Parser_create - (JNIEnv *, jclass, jstring); - -/* - * Class: edu_hit_ir_ltp4j_Parser - * Method: parse - * Signature: (Ljava/util/List;Ljava/util/List;Ljava/util/List;Ljava/util/List;)I - */ -JNIEXPORT jint JNICALL Java_edu_hit_ir_ltp4j_Parser_parse - (JNIEnv *, jclass, jobject, jobject, jobject, jobject); - -/* - * Class: edu_hit_ir_ltp4j_Parser - * Method: release - * Signature: ()V - */ -JNIEXPORT void JNICALL Java_edu_hit_ir_ltp4j_Parser_release - (JNIEnv *, jclass); - -#ifdef __cplusplus -} -#endif -#endif diff --git a/jni/edu_hit_ir_ltp4j_Postagger.h b/jni/edu_hit_ir_ltp4j_Postagger.h deleted file mode 100644 index c84646d..0000000 --- a/jni/edu_hit_ir_ltp4j_Postagger.h +++ /dev/null @@ -1,45 +0,0 @@ -/* DO NOT EDIT THIS FILE - it is machine generated */ -#include -/* Header for class edu_hit_ir_ltp4j_Postagger */ - -#ifndef _Included_edu_hit_ir_ltp4j_Postagger -#define _Included_edu_hit_ir_ltp4j_Postagger -#ifdef __cplusplus -extern "C" { -#endif -/* - * Class: edu_hit_ir_ltp4j_Postagger - * Method: create - * Signature: (Ljava/lang/String;)I - */ -JNIEXPORT jint JNICALL Java_edu_hit_ir_ltp4j_Postagger_create__Ljava_lang_String_2 - (JNIEnv *, jclass, jstring); - -/* - * Class: edu_hit_ir_ltp4j_Postagger - * Method: create - * Signature: (Ljava/lang/String;Ljava/lang/String;)I - */ -JNIEXPORT jint JNICALL Java_edu_hit_ir_ltp4j_Postagger_create__Ljava_lang_String_2Ljava_lang_String_2 - (JNIEnv *, jclass, jstring, jstring); - -/* - * Class: edu_hit_ir_ltp4j_Postagger - * Method: postag - * Signature: (Ljava/util/List;Ljava/util/List;)I - */ -JNIEXPORT jint JNICALL Java_edu_hit_ir_ltp4j_Postagger_postag - (JNIEnv *, jclass, jobject, jobject); - -/* - * Class: edu_hit_ir_ltp4j_Postagger - * Method: release - * Signature: ()V - */ -JNIEXPORT void JNICALL Java_edu_hit_ir_ltp4j_Postagger_release - (JNIEnv *, jclass); - -#ifdef __cplusplus -} -#endif -#endif diff --git a/jni/edu_hit_ir_ltp4j_SRL.h b/jni/edu_hit_ir_ltp4j_SRL.h deleted file mode 100644 index f699b2f..0000000 --- a/jni/edu_hit_ir_ltp4j_SRL.h +++ /dev/null @@ -1,37 +0,0 @@ -/* DO NOT EDIT THIS FILE - it is machine generated */ -#include -/* Header for class edu_hit_ir_ltp4j_SRL */ - -#ifndef _Included_edu_hit_ir_ltp4j_SRL -#define _Included_edu_hit_ir_ltp4j_SRL -#ifdef __cplusplus -extern "C" { -#endif -/* - * Class: edu_hit_ir_ltp4j_SRL - * Method: create - * Signature: (Ljava/lang/String;)I - */ -JNIEXPORT jint JNICALL Java_edu_hit_ir_ltp4j_SRL_create - (JNIEnv *, jclass, jstring); - -/* - * Class: edu_hit_ir_ltp4j_SRL - * Method: srl - * Signature: (Ljava/util/List;Ljava/util/List;Ljava/util/List;Ljava/util/List;Ljava/util/List;Ljava/util/List;)I - */ -JNIEXPORT jint JNICALL Java_edu_hit_ir_ltp4j_SRL_srl - (JNIEnv *, jclass, jobject, jobject, jobject, jobject, jobject, jobject); - -/* - * Class: edu_hit_ir_ltp4j_SRL - * Method: release - * Signature: ()V - */ -JNIEXPORT void JNICALL Java_edu_hit_ir_ltp4j_SRL_release - (JNIEnv *, jclass); - -#ifdef __cplusplus -} -#endif -#endif diff --git a/jni/edu_hit_ir_ltp4j_Segmentor.h b/jni/edu_hit_ir_ltp4j_Segmentor.h deleted file mode 100644 index b544228..0000000 --- a/jni/edu_hit_ir_ltp4j_Segmentor.h +++ /dev/null @@ -1,45 +0,0 @@ -/* DO NOT EDIT THIS FILE - it is machine generated */ -#include -/* Header for class edu_hit_ir_ltp4j_Segmentor */ - -#ifndef _Included_edu_hit_ir_ltp4j_Segmentor -#define _Included_edu_hit_ir_ltp4j_Segmentor -#ifdef __cplusplus -extern "C" { -#endif -/* - * Class: edu_hit_ir_ltp4j_Segmentor - * Method: create - * Signature: (Ljava/lang/String;)I - */ -JNIEXPORT jint JNICALL Java_edu_hit_ir_ltp4j_Segmentor_create__Ljava_lang_String_2 - (JNIEnv *, jclass, jstring); - -/* - * Class: edu_hit_ir_ltp4j_Segmentor - * Method: create - * Signature: (Ljava/lang/String;Ljava/lang/String;)I - */ -JNIEXPORT jint JNICALL Java_edu_hit_ir_ltp4j_Segmentor_create__Ljava_lang_String_2Ljava_lang_String_2 - (JNIEnv *, jclass, jstring, jstring); - -/* - * Class: edu_hit_ir_ltp4j_Segmentor - * Method: segment - * Signature: (Ljava/lang/String;Ljava/util/List;)I - */ -JNIEXPORT jint JNICALL Java_edu_hit_ir_ltp4j_Segmentor_segment - (JNIEnv *, jclass, jstring, jobject); - -/* - * Class: edu_hit_ir_ltp4j_Segmentor - * Method: release - * Signature: ()V - */ -JNIEXPORT void JNICALL Java_edu_hit_ir_ltp4j_Segmentor_release - (JNIEnv *, jclass); - -#ifdef __cplusplus -} -#endif -#endif diff --git a/jni/edu_hit_ir_ltp4j_SplitSentence.h b/jni/edu_hit_ir_ltp4j_SplitSentence.h deleted file mode 100644 index 6ea2e37..0000000 --- a/jni/edu_hit_ir_ltp4j_SplitSentence.h +++ /dev/null @@ -1,21 +0,0 @@ -/* DO NOT EDIT THIS FILE - it is machine generated */ -#include -/* Header for class edu_hit_ir_ltp4j_SplitSentence */ - -#ifndef _Included_edu_hit_ir_ltp4j_SplitSentence -#define _Included_edu_hit_ir_ltp4j_SplitSentence -#ifdef __cplusplus -extern "C" { -#endif -/* - * Class: edu_hit_ir_ltp4j_SplitSentence - * Method: splitSentence - * Signature: (Ljava/lang/String;Ljava/util/List;)V - */ -JNIEXPORT void JNICALL Java_edu_hit_ir_ltp4j_SplitSentence_splitSentence - (JNIEnv *, jclass, jstring, jobject); - -#ifdef __cplusplus -} -#endif -#endif diff --git a/ltp b/ltp deleted file mode 160000 index 7aa6dcf..0000000 --- a/ltp +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 7aa6dcf78ad2ef5830db3d94192b8ca234ee587b diff --git a/pom.xml b/pom.xml new file mode 100644 index 0000000..fdfddec --- /dev/null +++ b/pom.xml @@ -0,0 +1,178 @@ + + 4.0.0 + edu.hit.ir.ltp4j + ltp4j + 0.1.0-SNAPSHOT + nar + hitscir-ltp4j + Language Technology Platform for Java + http://github.com/HIT-SCIR/ltp4j + + + UTF-8 + true + + + + + The Apache License, Version 2.0 + http://www.apache.org/licenses/LICENSE-2.0.txt + + + + + + sonatype-nexus-snapshots + Sonatype Nexus Snapshots + https://oss.sonatype.org/content/repositories/snapshots/ + + + sonatype-nexus-staging + Nexus Release Repository + https://oss.sonatype.org/service/local/staging/deploy/maven2/ + + + + + + junit + junit + 4.7 + test + + + + + install + + + maven-dependency-plugin + + + maven-compiler-plugin + + + com.github.maven-nar + nar-maven-plugin + 3.5.0 + true + + + + + + + + + + + + + + + + + + + + + + + + + + + src/main/c++ + + ltp/examples/* + ltp/src/console/* + ltp/src/ltp/* + ltp/src/segmentor/io.cpp + ltp/src/segmentor/otcws.cpp + ltp/src/segmentor/segmentor_frontend.cpp + ltp/src/segmentor/customized_segmentor_frontend.cpp + ltp/src/postagger/io.cpp + ltp/src/postagger/otpos.cpp + ltp/src/postagger/postagger_frontend.cpp + ltp/src/ner/io.cpp + ltp/src/ner/otner.cpp + ltp/src/ner/ner_frontend.cpp + ltp/src/parser/* + ltp/src/parser.n/io.cpp + ltp/src/parser.n/main.cpp + ltp/src/parser.n/parser_frontend.cpp + ltp/src/srl/**/process/* + ltp/src/srl/*/pred.cpp + ltp/src/srl/*/train.cpp + ltp/src/srl/tool/merge.cpp + ltp/src/server/* + ltp/src/xml4nlp/* + ltp/src/unittest/* + ltp/thirdparty/eigen/unsupported/**/* + ltp/thirdparty/dynet/dynet/cuda.cc + ltp/thirdparty/tinyxml/* + ltp/thirdparty/tinythreadpp/* + ltp/thirdparty/maxent/train.cpp + ltp/thirdparty/maxent/predict.cpp + ltp/thirdparty/gtest/**/* + + + src/main/c++/ltp/src + src/main/c++/ltp/src/srl + src/main/c++/ltp/src/srl/common + src/main/c++/ltp/src/srl/include + src/main/c++/ltp/src/utils + src/main/c++/ltp/thirdparty/boost/include + src/main/c++/ltp/thirdparty/maxent + src/main/c++/ltp/thirdparty/eigen + src/main/c++/ltp/thirdparty/dynet + src/main/c++/ltp/thirdparty/jsoncpp/include + + false + + + + + + + + + + + **/* + + + + + jni + edu.hit.ir.ltp4j + false + + + + + + + + + + Segmentor + Postagger + NER + Parser + SRL + + + + + + + diff --git a/src/edu/hit/ir/ltp4j/NER.java b/src/edu/hit/ir/ltp4j/NER.java deleted file mode 100644 index fdfd546..0000000 --- a/src/edu/hit/ir/ltp4j/NER.java +++ /dev/null @@ -1,16 +0,0 @@ -package edu.hit.ir.ltp4j; -import java.util.List; - -public class NER { - static { - System.loadLibrary("ner_jni"); - } - public static native int create(String modelPath); - - public static native int recognize(List words, - List postags, List ners); - - public static native void release(); - -} - diff --git a/src/edu/hit/ir/ltp4j/Parser.java b/src/edu/hit/ir/ltp4j/Parser.java deleted file mode 100644 index 850e4c1..0000000 --- a/src/edu/hit/ir/ltp4j/Parser.java +++ /dev/null @@ -1,17 +0,0 @@ -package edu.hit.ir.ltp4j; -import java.util.List; - -public class Parser { - - static { - System.loadLibrary("parser_jni"); - } - public static native int create(String modelPath); - - public static native int parse(List words, - List tags, List heads, - List deprels); - - public static native void release(); -} - diff --git a/src/edu/hit/ir/ltp4j/Postagger.java b/src/edu/hit/ir/ltp4j/Postagger.java deleted file mode 100644 index 9e82428..0000000 --- a/src/edu/hit/ir/ltp4j/Postagger.java +++ /dev/null @@ -1,16 +0,0 @@ -package edu.hit.ir.ltp4j; -import java.util.List; - -public class Postagger { - static { - System.loadLibrary("postagger_jni"); - } - - public static native int create(String modelPath); - public static native int create(String modelPath, String lexiconPath); - public static native int postag(List words, - List tags); - public static native void release(); - -} - diff --git a/src/edu/hit/ir/ltp4j/Segmentor.java b/src/edu/hit/ir/ltp4j/Segmentor.java deleted file mode 100644 index 45914e2..0000000 --- a/src/edu/hit/ir/ltp4j/Segmentor.java +++ /dev/null @@ -1,14 +0,0 @@ -package edu.hit.ir.ltp4j; -import java.util.List; - - -public class Segmentor { - static { - System.loadLibrary("segmentor_jni"); - } - public static native int create(String modelPath); - public static native int create(String modelPath,String lexiconPath); - public static native int segment(String sent,List words); - public static native void release(); -} - diff --git a/src/edu/hit/ir/ltp4j/SplitSentence.java b/src/edu/hit/ir/ltp4j/SplitSentence.java deleted file mode 100644 index 566afe8..0000000 --- a/src/edu/hit/ir/ltp4j/SplitSentence.java +++ /dev/null @@ -1,9 +0,0 @@ -package edu.hit.ir.ltp4j; -import java.util.List; - -public class SplitSentence{ - static { - System.loadLibrary("split_sentence_jni"); - } - public static native void splitSentence(String sent,List sents); -} diff --git a/jni/ner_jni.cpp b/src/main/c++/edu_hit_ir_ltp4j_NER.cpp similarity index 77% rename from jni/ner_jni.cpp rename to src/main/c++/edu_hit_ir_ltp4j_NER.cpp index e6988b6..e3eb20e 100644 --- a/jni/ner_jni.cpp +++ b/src/main/c++/edu_hit_ir_ltp4j_NER.cpp @@ -1,30 +1,32 @@ +#include "ner/ner_dll.h" #include "edu_hit_ir_ltp4j_NER.h" +#include "string_to_jstring.hpp" #include #include -#include "ltp/ner_dll.h" -#include "string_to_jstring.hpp" +#include static void * ner = NULL; JNIEXPORT jint JNICALL Java_edu_hit_ir_ltp4j_NER_create -(JNIEnv * env, jclass obj, jstring model_path){ + (JNIEnv * env, jobject obj, jstring model_path) { const char * str = env->GetStringUTFChars( model_path , 0); - if(!ner){ + + if (!ner) { ner = ner_create_recognizer(str); - } - else{ + } else { ner_release_recognizer(ner); ner = ner_create_recognizer(str); } + env->ReleaseStringUTFChars( model_path, str); - if(ner) { + if (ner) { return 1; } return -1; } JNIEXPORT jint JNICALL Java_edu_hit_ir_ltp4j_NER_recognize -(JNIEnv * env, jclass obj, jobject array_words, jobject array_tags, jobject array_ners){ + (JNIEnv * env, jobject obj, jobject array_words, jobject array_tags, jobject array_ners) { jclass array_list = env->GetObjectClass(array_words); @@ -35,23 +37,22 @@ JNIEXPORT jint JNICALL Java_edu_hit_ir_ltp4j_NER_recognize std::vector words,tags,ners; int size_words = env->CallIntMethod(array_words,list_size); - int size_tags = env->CallIntMethod(array_tags,list_size); - if(size_words!=size_tags){ + if (size_words!=size_tags) { return 0; } - for(int i = 0;iCallObjectMethod(array_words,list_get,i); jstring s = reinterpret_cast (tmp); const char * st = env->GetStringUTFChars(s,0); std::string s_s(st); words.push_back(s_s); - env->ReleaseStringUTFChars( s, st); + env->ReleaseStringUTFChars( s, st); } - for(int i = 0;iCallObjectMethod(array_tags,list_get,i); jstring s = reinterpret_cast (tmp); const char * st = env->GetStringUTFChars(s,0); @@ -62,15 +63,15 @@ JNIEXPORT jint JNICALL Java_edu_hit_ir_ltp4j_NER_recognize int len = ner_recognize(ner,words,tags,ners); - for(int i = 0;iCallBooleanMethod(array_ners,list_add,tmp); } return len; } JNIEXPORT void JNICALL Java_edu_hit_ir_ltp4j_NER_release -(JNIEnv * env, jclass obj){ + (JNIEnv * env, jobject obj) { ner_release_recognizer(ner); ner = NULL; } diff --git a/jni/parser_jni.cpp b/src/main/c++/edu_hit_ir_ltp4j_Parser.cpp similarity index 64% rename from jni/parser_jni.cpp rename to src/main/c++/edu_hit_ir_ltp4j_Parser.cpp index 00c7ee9..a427876 100644 --- a/jni/parser_jni.cpp +++ b/src/main/c++/edu_hit_ir_ltp4j_Parser.cpp @@ -1,35 +1,41 @@ #include "edu_hit_ir_ltp4j_Parser.h" -#include "ltp/parser_dll.h" +#include "parser/parser_dll.h" #include "string_to_jstring.hpp" #include #include #include +#include static void * parser = NULL; JNIEXPORT jint JNICALL Java_edu_hit_ir_ltp4j_Parser_create -(JNIEnv * env, jclass obj, jstring model_path){ + (JNIEnv * env, jobject obj, jstring model_path) { + const char * str = env->GetStringUTFChars( model_path , 0); + if(!parser){ parser = parser_create_parser(str); } - env->ReleaseStringUTFChars( model_path, str); + + env->ReleaseStringUTFChars( model_path, str); + if(parser) { return 1; } + return -1; } JNIEXPORT jint JNICALL Java_edu_hit_ir_ltp4j_Parser_parse -(JNIEnv * env, jclass obj, jobject array_words, jobject array_tags, jobject array_heads, jobject array_deprels){ + (JNIEnv * env, jobject obj, jobject array_words, jobject array_tags, jobject array_heads, jobject array_deprels) { jclass array_list = env->GetObjectClass(array_words); jclass integer = env->FindClass("java/lang/Integer"); - jmethodID list_add = env->GetMethodID(array_list,"add","(Ljava/lang/Object;)Z"); - jmethodID list_get = env->GetMethodID(array_list,"get","(I)Ljava/lang/Object;"); - jmethodID list_size = env->GetMethodID(array_list,"size","()I"); - jmethodID integer_init =env->GetMethodID(integer,"","(I)V"); + jmethodID list_add = env->GetMethodID(array_list, "add", "(Ljava/lang/Object;)Z"); + jmethodID list_get = env->GetMethodID(array_list, "get", "(I)Ljava/lang/Object;"); + jmethodID list_size = env->GetMethodID(array_list, "size", "()I"); + jmethodID integer_init =env->GetMethodID(integer, "", "(I)V"); std::vector words,tags,deprels; std::vector heads; @@ -37,20 +43,20 @@ JNIEXPORT jint JNICALL Java_edu_hit_ir_ltp4j_Parser_parse int size_words = env->CallIntMethod(array_words,list_size); int size_tags = env->CallIntMethod(array_tags,list_size); - if(size_words!=size_tags) { + if (size_words!=size_tags) { return -1; } - for(int i = 0;iCallObjectMethod(array_words,list_get,i); jstring s = reinterpret_cast (tmp); const char * st = env->GetStringUTFChars(s,0); std::string s_s(st); words.push_back(s_s); - env->ReleaseStringUTFChars( s, st); + env->ReleaseStringUTFChars( s, st); } - for(int i = 0;iCallObjectMethod(array_tags,list_get,i); jstring s = reinterpret_cast (tmp); const char * st = env->GetStringUTFChars(s,0); @@ -60,18 +66,18 @@ JNIEXPORT jint JNICALL Java_edu_hit_ir_ltp4j_Parser_parse } int len = parser_parse(parser,words,tags,heads,deprels); - if(len<0) - { + + if (len < 0) { return -1; } - int size = heads.size(); - for(int i = 0;iNewObject(integer,integer_init,heads.at(i)); env->CallBooleanMethod(array_heads,list_add, integer_object); } - for(int i = 0;iCallBooleanMethod(array_deprels,list_add,tmp); } @@ -80,10 +86,8 @@ JNIEXPORT jint JNICALL Java_edu_hit_ir_ltp4j_Parser_parse } JNIEXPORT void JNICALL Java_edu_hit_ir_ltp4j_Parser_release -(JNIEnv * env, jclass obj){ - parser_release_parser(parser); - parser = NULL; + (JNIEnv * env, jobject obj) { + parser_release_parser(parser); + parser = NULL; } - - diff --git a/jni/postag_jni.cpp b/src/main/c++/edu_hit_ir_ltp4j_Postagger.cpp similarity index 58% rename from jni/postag_jni.cpp rename to src/main/c++/edu_hit_ir_ltp4j_Postagger.cpp index 65586f2..e3ba6eb 100644 --- a/jni/postag_jni.cpp +++ b/src/main/c++/edu_hit_ir_ltp4j_Postagger.cpp @@ -1,42 +1,48 @@ +#include "postagger/postag_dll.h" #include "edu_hit_ir_ltp4j_Postagger.h" +#include "string_to_jstring.hpp" #include #include -#include "ltp/postag_dll.h" #include -#include "string_to_jstring.hpp" +#include static void * postagger = NULL; JNIEXPORT jint JNICALL Java_edu_hit_ir_ltp4j_Postagger_create__Ljava_lang_String_2 -(JNIEnv * env, jclass obj, jstring model_path){ - const char * str = env->GetStringUTFChars( model_path , 0); + (JNIEnv * env, jobject obj, jstring model_path) { + const char* str = env->GetStringUTFChars( model_path , 0); + if(!postagger){ postagger = postagger_create_postagger(str); - } - else { + } else { postagger_release_postagger(postagger); postagger = postagger_create_postagger(str); } - env->ReleaseStringUTFChars( model_path, str); - if(postagger) { + + env->ReleaseStringUTFChars( model_path, str); + + if (postagger) { return 1; } return -1; } JNIEXPORT jint JNICALL Java_edu_hit_ir_ltp4j_Postagger_create__Ljava_lang_String_2Ljava_lang_String_2 -(JNIEnv * env, jclass obj, jstring model_path, jstring lexicon_path){ + (JNIEnv * env, jobject obj, jstring model_path, jstring lexicon_path) { + const char * model = env->GetStringUTFChars( model_path , 0); - const char * lexicon = env->GetStringUTFChars( model_path , 0); + const char * lexicon = env->GetStringUTFChars( lexicon_path , 0); + if(!postagger){ postagger = postagger_create_postagger(model,lexicon); - } - else { + } else { postagger_release_postagger(postagger); postagger = postagger_create_postagger(model,lexicon); } - env->ReleaseStringUTFChars( model_path, model); - env->ReleaseStringUTFChars( lexicon_path, lexicon); + + env->ReleaseStringUTFChars( model_path, model); + env->ReleaseStringUTFChars( lexicon_path, lexicon); + if(postagger) { return 1; } @@ -45,29 +51,29 @@ JNIEXPORT jint JNICALL Java_edu_hit_ir_ltp4j_Postagger_create__Ljava_lang_String JNIEXPORT jint JNICALL Java_edu_hit_ir_ltp4j_Postagger_postag -(JNIEnv * env, jclass, jobject array_words, jobject array_postags){ + (JNIEnv * env, jobject obj, jobject array_words, jobject array_postags) { jclass array_list = env->GetObjectClass(array_words); - jmethodID list_add = env->GetMethodID(array_list,"add","(Ljava/lang/Object;)Z"); - jmethodID list_get = env->GetMethodID(array_list,"get","(I)Ljava/lang/Object;"); - jmethodID list_size = env->GetMethodID(array_list,"size","()I"); + jmethodID list_add = env->GetMethodID(array_list, "add", "(Ljava/lang/Object;)Z"); + jmethodID list_get = env->GetMethodID(array_list, "get", "(I)Ljava/lang/Object;"); + jmethodID list_size = env->GetMethodID(array_list, "size", "()I"); - std::vector words,postags; + std::vector words, postags; int size = env->CallIntMethod(array_words,list_size); - for(int i = 0;iCallObjectMethod(array_words,list_get,i); jstring s = reinterpret_cast (tmp); const char * st = env->GetStringUTFChars(s,0); std::string s_s(st); words.push_back(s_s); - env->ReleaseStringUTFChars( s, st); + env->ReleaseStringUTFChars( s, st); } int len = postagger_postag(postagger,words,postags); - for(int i = 0;iCallBooleanMethod(array_postags,list_add,tmp); } @@ -76,9 +82,9 @@ JNIEXPORT jint JNICALL Java_edu_hit_ir_ltp4j_Postagger_postag } JNIEXPORT void JNICALL Java_edu_hit_ir_ltp4j_Postagger_release -(JNIEnv * env, jclass obj){ - postagger_release_postagger(postagger); - postagger = NULL; + (JNIEnv * env, jobject obj) { + postagger_release_postagger(postagger); + postagger = NULL; } diff --git a/jni/srl_jni.cpp b/src/main/c++/edu_hit_ir_ltp4j_SRL.cpp similarity index 61% rename from jni/srl_jni.cpp rename to src/main/c++/edu_hit_ir_ltp4j_SRL.cpp index b2ee348..780bc2e 100644 --- a/jni/srl_jni.cpp +++ b/src/main/c++/edu_hit_ir_ltp4j_SRL.cpp @@ -1,15 +1,16 @@ #include "edu_hit_ir_ltp4j_SRL.h" -#include "ltp/SRL_DLL.h" +#include "srl/SRL_DLL.h" #include "string_to_jstring.hpp" #include #include #include +#include JNIEXPORT jint JNICALL Java_edu_hit_ir_ltp4j_SRL_create -(JNIEnv * env, jclass obj, jstring model_path){ + (JNIEnv * env, jobject obj, jstring model_path){ const char * str = env->GetStringUTFChars( model_path , 0); std::string path(str); - int tag = SRL_LoadResource(path); + int tag = srl_load_resource(path); env->ReleaseStringUTFChars( model_path, str); if(0==tag) { return 1; @@ -18,38 +19,45 @@ JNIEXPORT jint JNICALL Java_edu_hit_ir_ltp4j_SRL_create } JNIEXPORT jint JNICALL Java_edu_hit_ir_ltp4j_SRL_srl -(JNIEnv * env, jclass obj, jobject array_words, jobject array_tags, jobject array_ners, jobject array_heads, jobject array_deprels, jobject srl_result){ - +(JNIEnv * env, jclass obj, jobject array_words, jobject array_tags, jobject array_heads, jobject array_deprels, jobject srl_result){ jclass array_list = env->GetObjectClass(array_words); jmethodID list_construct = env->GetMethodID(array_list,"","()V"); - jmethodID list_add = env->GetMethodID(array_list,"add","(Ljava/lang/Object;)Z"); - jmethodID list_get = env->GetMethodID(array_list,"get","(I)Ljava/lang/Object;"); - jmethodID list_size = env->GetMethodID(array_list,"size","()I"); + jmethodID list_add = env->GetMethodID(array_list, "add", "(Ljava/lang/Object;)Z"); + jmethodID list_get = env->GetMethodID(array_list, "get", "(I)Ljava/lang/Object;"); + jmethodID list_size = env->GetMethodID(array_list, "size", "()I"); jclass integer = env->FindClass("java/lang/Integer"); jmethodID integer_construct =env->GetMethodID(integer,"","(I)V"); jmethodID integer_int =env->GetMethodID(integer,"intValue","()I"); jclass pair = env->FindClass("edu/hit/ir/ltp4j/Pair"); - jmethodID pair_construct = env->GetMethodID(pair,"","(Ljava/lang/Object;Ljava/lang/Object;)V"); + jmethodID pair_construct = env->GetMethodID(pair, "", + "(Ljava/lang/Object;Ljava/lang/Object;)V"); - std::vector words,tags,ners,deprels; + std::vector words,tags,deprels; std::vector heads; std::vector > parsers; - std::vector< std::pair< int, std::vector< std::pair > > > > srls; - - int size_words = env->CallIntMethod(array_words,list_size); - for(int i = 0;i > + > + > + > srls; + + unsigned size_words = env->CallIntMethod(array_words, list_size); + for(unsigned i = 0; i < size_words; i++){ jobject tmp = env->CallObjectMethod(array_words,list_get,i); jstring s = reinterpret_cast (tmp); const char * st = env->GetStringUTFChars(s,0); std::string s_s(st); words.push_back(s_s); - env->ReleaseStringUTFChars( s, st); + env->ReleaseStringUTFChars( s, st); } - int size_tags = env->CallIntMethod(array_tags,list_size); - for(int i = 0;iCallIntMethod(array_tags, list_size); + for(unsigned i = 0;iCallObjectMethod(array_tags,list_get,i); jstring s = reinterpret_cast (tmp); const char * st = env->GetStringUTFChars(s,0); @@ -58,25 +66,16 @@ JNIEXPORT jint JNICALL Java_edu_hit_ir_ltp4j_SRL_srl env->ReleaseStringUTFChars( s, st); } - int size_ners = env->CallIntMethod(array_ners,list_size); - for(int i = 0;iCallObjectMethod(array_ners,list_get,i); - jstring s = reinterpret_cast (tmp); - const char * st = env->GetStringUTFChars(s,0); - std::string s_s(st); - ners.push_back(s_s); - env->ReleaseStringUTFChars( s, st); - } - int size_heads = env->CallIntMethod(array_heads,list_size); - for(int i = 0;iCallIntMethod(array_heads,list_size); + for(unsigned i = 0; i < size_heads; i++){ jobject tmp = env->CallObjectMethod(array_heads,list_get,i); int digit = env->CallIntMethod(tmp,integer_int); heads.push_back(digit); } - int size_deprels = env->CallIntMethod(array_deprels,list_size); - for(int i = 0;iCallIntMethod(array_deprels,list_size); + for(unsigned i = 0;iCallObjectMethod(array_deprels,list_get,i); jstring s = reinterpret_cast (tmp); const char * st = env->GetStringUTFChars(s,0); @@ -85,20 +84,20 @@ JNIEXPORT jint JNICALL Java_edu_hit_ir_ltp4j_SRL_srl env->ReleaseStringUTFChars( s, st); } - for(int i = 0;iNewObject(integer,integer_construct,srls[i].first); jobject args = env->NewObject(array_list,list_construct); - for(int j = 0;jNewObject(integer,integer_construct,srls[i].second[j].second.first); jobject end = env->NewObject(integer,integer_construct,srls[i].second[j].second.second); @@ -113,11 +112,11 @@ JNIEXPORT jint JNICALL Java_edu_hit_ir_ltp4j_SRL_srl env->CallBooleanMethod(srl_result,list_add,outer); } - return srls.size(); + return (int)srls.size(); } JNIEXPORT void JNICALL Java_edu_hit_ir_ltp4j_SRL_release -(JNIEnv * env, jclass obj){ - SRL_ReleaseResource(); + (JNIEnv * env, jobject obj) { + srl_release_resource(); } diff --git a/jni/segment_jni.cpp b/src/main/c++/edu_hit_ir_ltp4j_Segmentor.cpp similarity index 55% rename from jni/segment_jni.cpp rename to src/main/c++/edu_hit_ir_ltp4j_Segmentor.cpp index 54b6802..baa0f7c 100644 --- a/jni/segment_jni.cpp +++ b/src/main/c++/edu_hit_ir_ltp4j_Segmentor.cpp @@ -1,47 +1,48 @@ -// segmentor_jni.cpp : Defines the exported functions for the DLL application. -// - -#include "ltp/segment_dll.h" +#include "segmentor/segment_dll.h" #include "edu_hit_ir_ltp4j_Segmentor.h" #include "string_to_jstring.hpp" #include #include #include -using namespace std; - -static void * segmentor = NULL; +static void* segmentor = NULL; JNIEXPORT jint JNICALL Java_edu_hit_ir_ltp4j_Segmentor_create__Ljava_lang_String_2 -(JNIEnv * env, jclass obj, jstring model_path){ - const char * str = env->GetStringUTFChars( model_path , 0); + (JNIEnv* env, jobject obj, jstring model_path) { + const char* str = env->GetStringUTFChars( model_path , 0); + if(!segmentor){ segmentor = segmentor_create_segmentor(str); - } - else{ + } else{ segmentor_release_segmentor(segmentor); segmentor = segmentor_create_segmentor(str); } - env->ReleaseStringUTFChars( model_path, str); + + env->ReleaseStringUTFChars( model_path, str); + if(segmentor) { return 1; } + return -1; } JNIEXPORT jint JNICALL Java_edu_hit_ir_ltp4j_Segmentor_create__Ljava_lang_String_2Ljava_lang_String_2 -(JNIEnv * env, jclass obj, jstring model_path, jstring lexicon_path){ - const char * str_model = env->GetStringUTFChars( model_path , 0); - const char * str_lexicon = env->GetStringUTFChars( lexicon_path , 0); + (JNIEnv* env, jobject obj, jstring model_path, jstring lexicon_path) { + + const char* str_model = env->GetStringUTFChars( model_path , 0); + const char* str_lexicon = env->GetStringUTFChars( lexicon_path , 0); + if(!segmentor){ segmentor = segmentor_create_segmentor(str_model,str_lexicon); - } - else{ + } else{ segmentor_release_segmentor(segmentor); segmentor = segmentor_create_segmentor(str_model,str_lexicon); } - env->ReleaseStringUTFChars( model_path, str_model); - env->ReleaseStringUTFChars( lexicon_path, str_lexicon); + + env->ReleaseStringUTFChars( model_path, str_model); + env->ReleaseStringUTFChars( lexicon_path, str_lexicon); + if(segmentor) { return 1; } @@ -49,26 +50,27 @@ JNIEXPORT jint JNICALL Java_edu_hit_ir_ltp4j_Segmentor_create__Ljava_lang_String } JNIEXPORT jint JNICALL Java_edu_hit_ir_ltp4j_Segmentor_segment -(JNIEnv * env, jclass obj, jstring sent, jobject array_words){ - jclass array_list = env->GetObjectClass(array_words); + (JNIEnv* env, jobject obj, jstring sent, jobject array_words) { - jmethodID list_add = env->GetMethodID(array_list,"add","(Ljava/lang/Object;)Z"); + jclass array_list = env->GetObjectClass(array_words); + jmethodID list_add = env->GetMethodID(array_list, "add", "(Ljava/lang/Object;)Z"); - const char * str_sent = env->GetStringUTFChars( sent , 0); + const char* str_sent = env->GetStringUTFChars( sent , 0); std::string sentence(str_sent); std::vector words; - int len = segmentor_segment(segmentor,sentence,words); - for(int i = 0;iCallBooleanMethod(array_words,list_add,tmp); } - env->ReleaseStringUTFChars(sent, str_sent); + env->ReleaseStringUTFChars(sent, str_sent); return len; } JNIEXPORT void JNICALL Java_edu_hit_ir_ltp4j_Segmentor_release -(JNIEnv * env, jclass obj){ + (JNIEnv* env, jobject obj) { segmentor_release_segmentor(segmentor); segmentor = NULL; } diff --git a/jni/split_sentence_jni.cpp b/src/main/c++/edu_hit_ir_ltp4j_SplitSentence.cpp similarity index 79% rename from jni/split_sentence_jni.cpp rename to src/main/c++/edu_hit_ir_ltp4j_SplitSentence.cpp index b82d7cd..cde1623 100644 --- a/jni/split_sentence_jni.cpp +++ b/src/main/c++/edu_hit_ir_ltp4j_SplitSentence.cpp @@ -1,27 +1,28 @@ #include "edu_hit_ir_ltp4j_SplitSentence.h" -#include "ltp/SplitSentence.h" +#include "splitsnt/SplitSentence.h" #include "string_to_jstring.hpp" #include #include #include +#include using namespace std; JNIEXPORT void JNICALL Java_edu_hit_ir_ltp4j_SplitSentence_splitSentence - (JNIEnv * env, jclass obj, jstring sent, jobject array_sents){ + (JNIEnv * env, jobject obj, jstring sent, jobject array_sents){ const char * str = env->GetStringUTFChars(sent,0); string s_s(str); jclass array_list = env->GetObjectClass(array_sents); - jmethodID list_add = env->GetMethodID(array_list,"add","(Ljava/lang/Object;)Z"); vector sents; SplitSentence(s_s,sents); - for(int i = 0;iCallBooleanMethod(array_sents,list_add,tmp); } + env->ReleaseStringUTFChars(sent,str); } diff --git a/src/main/c++/ltp b/src/main/c++/ltp new file mode 160000 index 0000000..a938aa2 --- /dev/null +++ b/src/main/c++/ltp @@ -0,0 +1 @@ +Subproject commit a938aa27c3a1fd9e95ef3f0ee55ac1aabbee6d16 diff --git a/jni/string_to_jstring.hpp b/src/main/c++/string_to_jstring.hpp similarity index 71% rename from jni/string_to_jstring.hpp rename to src/main/c++/string_to_jstring.hpp index 238fda2..21b5deb 100644 --- a/jni/string_to_jstring.hpp +++ b/src/main/c++/string_to_jstring.hpp @@ -1,11 +1,12 @@ #include #include +#include inline jstring stringToJstring(JNIEnv* env, const char* pat) { jclass strClass = env->FindClass("Ljava/lang/String;"); jmethodID ctorID = env->GetMethodID(strClass, "", "([BLjava/lang/String;)V"); - jbyteArray bytes = env->NewByteArray(strlen(pat)); - env->SetByteArrayRegion(bytes, 0, strlen(pat), (jbyte*)pat); + jbyteArray bytes = env->NewByteArray( (jsize)strlen(pat) ); + env->SetByteArrayRegion(bytes, 0, (jsize)strlen(pat), (jbyte*)pat); jstring encoding = env->NewStringUTF("utf-8"); return (jstring)env->NewObject(strClass, ctorID, bytes, encoding); } diff --git a/src/main/java/edu/hit/ir/ltp4j/NER.java b/src/main/java/edu/hit/ir/ltp4j/NER.java new file mode 100644 index 0000000..f59d910 --- /dev/null +++ b/src/main/java/edu/hit/ir/ltp4j/NER.java @@ -0,0 +1,14 @@ +package edu.hit.ir.ltp4j; +import java.util.List; + +public class NER { + static { + NarSystem.loadLibrary(); + } + + public final native int create(String modelPath); + public final native int recognize(List words, + List postags, List ners); + public final native void release(); +} + diff --git a/src/edu/hit/ir/ltp4j/Pair.java b/src/main/java/edu/hit/ir/ltp4j/Pair.java similarity index 100% rename from src/edu/hit/ir/ltp4j/Pair.java rename to src/main/java/edu/hit/ir/ltp4j/Pair.java diff --git a/src/main/java/edu/hit/ir/ltp4j/Parser.java b/src/main/java/edu/hit/ir/ltp4j/Parser.java new file mode 100644 index 0000000..d5ff436 --- /dev/null +++ b/src/main/java/edu/hit/ir/ltp4j/Parser.java @@ -0,0 +1,15 @@ +package edu.hit.ir.ltp4j; +import java.util.List; + +public class Parser { + static { + NarSystem.loadLibrary(); + } + + public final native int create(String modelPath); + public final native int parse(List words, + List tags, List heads, + List deprels); + public final native void release(); +} + diff --git a/src/main/java/edu/hit/ir/ltp4j/Postagger.java b/src/main/java/edu/hit/ir/ltp4j/Postagger.java new file mode 100644 index 0000000..b2790ee --- /dev/null +++ b/src/main/java/edu/hit/ir/ltp4j/Postagger.java @@ -0,0 +1,15 @@ +package edu.hit.ir.ltp4j; +import java.util.List; + +public class Postagger { + static { + NarSystem.loadLibrary(); + } + + public final native int create(String modelPath); + public final native int create(String modelPath, String lexiconPath); + public final native int postag(List words, + List tags); + public final native void release(); +} + diff --git a/src/edu/hit/ir/ltp4j/SRL.java b/src/main/java/edu/hit/ir/ltp4j/SRL.java similarity index 59% rename from src/edu/hit/ir/ltp4j/SRL.java rename to src/main/java/edu/hit/ir/ltp4j/SRL.java index 927af76..fbe96de 100644 --- a/src/edu/hit/ir/ltp4j/SRL.java +++ b/src/main/java/edu/hit/ir/ltp4j/SRL.java @@ -3,18 +3,16 @@ public class SRL { static { - System.loadLibrary("srl_jni"); + NarSystem.loadLibrary(); } - public static native int create(String modelPath); - public static native int srl( + public final native int create(String modelPath); + public final native int srl( List words, List tags, - List ners, List heads, List deprels, List>>>> srls); - - public static native void release(); + public final native void release(); } diff --git a/src/main/java/edu/hit/ir/ltp4j/Segmentor.java b/src/main/java/edu/hit/ir/ltp4j/Segmentor.java new file mode 100644 index 0000000..01f5cf2 --- /dev/null +++ b/src/main/java/edu/hit/ir/ltp4j/Segmentor.java @@ -0,0 +1,14 @@ +package edu.hit.ir.ltp4j; +import java.util.List; + +public class Segmentor { + static { + NarSystem.loadLibrary(); + } + + public final native int create(String modelPath); + public final native int create(String modelPath, String lexiconPath); + public final native int segment(String sent, List words); + public final native void release(); +} + diff --git a/src/main/java/edu/hit/ir/ltp4j/SplitSentence.java b/src/main/java/edu/hit/ir/ltp4j/SplitSentence.java new file mode 100644 index 0000000..93c55ff --- /dev/null +++ b/src/main/java/edu/hit/ir/ltp4j/SplitSentence.java @@ -0,0 +1,10 @@ +package edu.hit.ir.ltp4j; +import java.util.List; + +public class SplitSentence{ + static { + NarSystem.loadLibrary(); + } + + public final native void splitSentence(String sent,List sents); +} diff --git a/src/test/c++/main.cpp b/src/test/c++/main.cpp new file mode 100644 index 0000000..d8810f8 --- /dev/null +++ b/src/test/c++/main.cpp @@ -0,0 +1,5 @@ +#include + +int main(int argc, char* argv[]) { + return 0; +} diff --git a/src/test/java/edu/hit/ir/ltp4j/test/NERTest.java b/src/test/java/edu/hit/ir/ltp4j/test/NERTest.java new file mode 100644 index 0000000..bdf3df9 --- /dev/null +++ b/src/test/java/edu/hit/ir/ltp4j/test/NERTest.java @@ -0,0 +1,48 @@ +package edu.hit.ir.ltp4j.test; + +import edu.hit.ir.ltp4j.NER; +import java.util.List; +import java.util.ArrayList; +import org.junit.Assert; +import org.junit.Test; + +public class NERTest { + @Test public final void testNERCreate() + throws Exception + { + NER app = new NER(); + Assert.assertEquals( 1, app.create("ltp_data/ner.model") ); + } + + @Test public final void testNERRecognize() + throws Exception + { + NER app = new NER(); + app.create("ltp_data/ner.model"); + + List words = new ArrayList(); + List tags = new ArrayList(); + + words.add("中国"); tags.add("ns"); + words.add("进出口"); tags.add("n"); + words.add("银行"); tags.add("n"); + words.add("与"); tags.add("p"); + words.add("中国"); tags.add("ns"); + words.add("银行"); tags.add("n"); + words.add("加强"); tags.add("v"); + words.add("合作"); tags.add("v"); + + List result = new ArrayList(); + app.recognize(words, tags, result); + + Assert.assertEquals( 8, result.size() ); + Assert.assertEquals( "B-Ni", result.get(0) ); + Assert.assertEquals( "I-Ni", result.get(1) ); + Assert.assertEquals( "E-Ni", result.get(2) ); + Assert.assertEquals( "O", result.get(3) ); + Assert.assertEquals( "B-Ni", result.get(4) ); + Assert.assertEquals( "E-Ni", result.get(5) ); + Assert.assertEquals( "O", result.get(6) ); + Assert.assertEquals( "O", result.get(7) ); + } +} diff --git a/src/test/java/edu/hit/ir/ltp4j/test/ParserTest.java b/src/test/java/edu/hit/ir/ltp4j/test/ParserTest.java new file mode 100644 index 0000000..bdc5053 --- /dev/null +++ b/src/test/java/edu/hit/ir/ltp4j/test/ParserTest.java @@ -0,0 +1,50 @@ +package edu.hit.ir.ltp4j.test; + +import edu.hit.ir.ltp4j.Parser; +import java.util.List; +import java.util.ArrayList; +import org.junit.Assert; +import org.junit.Test; + +public class ParserTest { + @Test public final void testParserCreate() + throws Exception + { + Parser app = new Parser(); + Assert.assertEquals( 1, app.create("ltp_data/parser.model") ); + } + + @Test public final void testParserParse() + throws Exception + { + Parser app = new Parser(); + app.create("ltp_data/parser.model"); + + List words = new ArrayList(); + List tags = new ArrayList(); + + words.add("中国"); tags.add("ns"); + words.add("进出口"); tags.add("n"); + words.add("银行"); tags.add("n"); + words.add("与"); tags.add("p"); + words.add("中国"); tags.add("ns"); + words.add("银行"); tags.add("n"); + words.add("加强"); tags.add("v"); + words.add("合作"); tags.add("v"); + + List heads = new ArrayList(); + List deprels = new ArrayList(); + + app.parse(words, tags, heads, deprels); + + Assert.assertEquals( 8, heads.size() ); Assert.assertEquals( 8, deprels.size() ); + Assert.assertTrue( 3 == heads.get(0) ); Assert.assertEquals( "ATT", deprels.get(0) ); + Assert.assertTrue( 3 == heads.get(1) ); Assert.assertEquals( "ATT", deprels.get(1) ); + Assert.assertTrue( 7 == heads.get(2) ); Assert.assertEquals( "SBV", deprels.get(2) ); + Assert.assertTrue( 3 == heads.get(3) ); Assert.assertEquals( "ADV", deprels.get(3) ); + Assert.assertTrue( 6 == heads.get(4) ); Assert.assertEquals( "ATT", deprels.get(4) ); + Assert.assertTrue( 4 == heads.get(5) ); Assert.assertEquals( "POB", deprels.get(5) ); + Assert.assertTrue( 0 == heads.get(6) ); Assert.assertEquals( "HED", deprels.get(6) ); + Assert.assertTrue( 7 == heads.get(7) ); Assert.assertEquals( "VOB", deprels.get(7) ); + } +} diff --git a/src/test/java/edu/hit/ir/ltp4j/test/PostaggerTest.java b/src/test/java/edu/hit/ir/ltp4j/test/PostaggerTest.java new file mode 100644 index 0000000..deea7a8 --- /dev/null +++ b/src/test/java/edu/hit/ir/ltp4j/test/PostaggerTest.java @@ -0,0 +1,47 @@ +package edu.hit.ir.ltp4j.test; + +import edu.hit.ir.ltp4j.Postagger; +import java.util.List; +import java.util.ArrayList; +import org.junit.Assert; +import org.junit.Test; + +public class PostaggerTest { + @Test public final void testPostaggerCreate() + throws Exception + { + Postagger app = new Postagger(); + Assert.assertEquals( 1, app.create("ltp_data/pos.model") ); + } + + @Test public final void testPostaggerPostag() + throws Exception + { + Postagger app = new Postagger(); + app.create("ltp_data/pos.model"); + + List words = new ArrayList(); + + words.add("中国"); + words.add("进出口"); + words.add("银行"); + words.add("与"); + words.add("中国"); + words.add("银行"); + words.add("加强"); + words.add("合作"); + + List result = new ArrayList(); + + app.postag(words, result); + Assert.assertEquals( 8, result.size() ); + Assert.assertEquals( "ns", result.get(0) ); + Assert.assertEquals( "n", result.get(1) ); + Assert.assertEquals( "n", result.get(2) ); + Assert.assertEquals( "p", result.get(3) ); + Assert.assertEquals( "ns", result.get(4) ); + Assert.assertEquals( "n", result.get(5) ); + Assert.assertEquals( "v", result.get(6) ); + Assert.assertEquals( "v", result.get(7) ); + } +} diff --git a/src/test/java/edu/hit/ir/ltp4j/test/SRLTest.java b/src/test/java/edu/hit/ir/ltp4j/test/SRLTest.java new file mode 100644 index 0000000..b02fd72 --- /dev/null +++ b/src/test/java/edu/hit/ir/ltp4j/test/SRLTest.java @@ -0,0 +1,14 @@ +package edu.hit.ir.ltp4j.test; + +import edu.hit.ir.ltp4j.SRL; +import org.junit.Assert; +import org.junit.Test; + +public class SRLTest { + @Test public final void testSRLCreate() + throws Exception + { + SRL app = new SRL(); + Assert.assertEquals( 1, app.create("ltp_data/pisrl.model") ); + } +} diff --git a/src/test/java/edu/hit/ir/ltp4j/test/SegmentorTest.java b/src/test/java/edu/hit/ir/ltp4j/test/SegmentorTest.java new file mode 100644 index 0000000..0b88325 --- /dev/null +++ b/src/test/java/edu/hit/ir/ltp4j/test/SegmentorTest.java @@ -0,0 +1,36 @@ +package edu.hit.ir.ltp4j.test; + +import edu.hit.ir.ltp4j.Segmentor; +import java.util.List; +import java.util.ArrayList; +import org.junit.Assert; +import org.junit.Test; + +public class SegmentorTest { + @Test public final void testSegmentorCreate() + throws Exception + { + Segmentor app = new Segmentor(); + Assert.assertEquals( 1, app.create("ltp_data/cws.model") ); + } + + @Test public final void testSegmentorSegment() + throws Exception + { + Segmentor app = new Segmentor(); + app.create("ltp_data/cws.model"); + + List result = new ArrayList(); + app.segment("中国进出口银行与中国银行加强合作", result); + + Assert.assertEquals( 8, result.size() ); + Assert.assertEquals( "中国", result.get(0) ); + Assert.assertEquals( "进出口", result.get(1) ); + Assert.assertEquals( "银行", result.get(2) ); + Assert.assertEquals( "与", result.get(3) ); + Assert.assertEquals( "中国", result.get(4) ); + Assert.assertEquals( "银行", result.get(5) ); + Assert.assertEquals( "加强", result.get(6) ); + Assert.assertEquals( "合作", result.get(7) ); + } +}