Skip to content

Commit

Permalink
had to remove a redundant block of code during merge
Browse files Browse the repository at this point in the history
  • Loading branch information
mdorf committed Sep 6, 2023
2 parents 1fa5e42 + 29153b6 commit 629aa39
Show file tree
Hide file tree
Showing 3 changed files with 107 additions and 51 deletions.
4 changes: 2 additions & 2 deletions lib/ontologies_linked_data/diff/bubastis_diff.rb
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,11 @@ class BubastisDiffCommand
# Loading one file locally and one from the web and outputting results to plain text:
# java -jar bubastis_1_2.jar -ontology1 "H://disease_ontology_version_1.owl" -ontology2 "http://www.disease.org/diseaseontology_latest.owl" -output "C://my_diff.txt"

def initialize(input_fileOld, input_fileNew)
def initialize(input_fileOld, input_fileNew, output_repo)
@bubastis_jar_path = LinkedData.bindir + "/bubastis.jar"
@input_fileOld = input_fileOld
@input_fileNew = input_fileNew
@output_repo = File.expand_path(@input_fileNew).gsub(File.basename(@input_fileNew),'')
@output_repo = output_repo
@file_diff_path = nil
@java_heap_size = LinkedData.settings.java_max_heap_size
end
Expand Down
92 changes: 68 additions & 24 deletions lib/ontologies_linked_data/models/ontology_submission.rb
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,7 @@ def sanity_check
return true
end

zip = LinkedData::Utils::FileHelpers.zip?(self.uploadFilePath)
zip = zipped?
files = LinkedData::Utils::FileHelpers.files_from_zip(self.uploadFilePath) if zip

if not zip and self.masterFileName.nil?
Expand Down Expand Up @@ -261,8 +261,12 @@ def data_folder
self.submissionId.to_s)
end

def zipped?(full_file_path = uploadFilePath)
LinkedData::Utils::FileHelpers.zip?(full_file_path) || LinkedData::Utils::FileHelpers.gzip?(full_file_path)
end

def zip_folder
return File.join([self.data_folder, "unzipped"])
File.join([data_folder, 'unzipped'])
end

def csv_path
Expand All @@ -286,17 +290,16 @@ def triples_file_path
self.bring(:masterFileName) if self.bring?(:masterFileName)
triples_file_name = File.basename(self.uploadFilePath.to_s)
full_file_path = File.join(File.expand_path(self.data_folder.to_s), triples_file_name)
zip = LinkedData::Utils::FileHelpers.zip?(full_file_path)
zip = zipped? full_file_path
triples_file_name = File.basename(self.masterFileName.to_s) if zip && self.masterFileName
file_name = File.join(File.expand_path(self.data_folder.to_s), triples_file_name)
File.expand_path(file_name)
end

def unzip_submission(logger)
zip = LinkedData::Utils::FileHelpers.zip?(self.uploadFilePath)
zip_dst = nil

if zip
if zipped?
zip_dst = self.zip_folder

if Dir.exist? zip_dst
Expand All @@ -311,10 +314,12 @@ def unzip_submission(logger)
self.save
end

logger.info("Files extracted from zip #{extracted}")
logger.flush
if logger
logger.info("Files extracted from zip #{extracted}")
logger.flush
end
end
return zip_dst
zip_dst
end

def delete_old_submission_files
Expand All @@ -328,17 +333,18 @@ def delete_old_submission_files
# accepts another submission in 'older' (it should be an 'older' ontology version)
def diff(logger, older)
begin
self.bring_remaining
self.bring(:diffFilePath)
self.bring(:uploadFilePath)
older.bring(:uploadFilePath)
bring_remaining
bring :diffFilePath if bring? :diffFilePath
older.bring :uploadFilePath if older.bring? :uploadFilePath

LinkedData::Diff.logger = logger
bubastis = LinkedData::Diff::BubastisDiffCommand.new(
File.expand_path(older.uploadFilePath),
File.expand_path(self.uploadFilePath)
File.expand_path(older.master_file_path),
File.expand_path(self.master_file_path),
data_folder
)
self.diffFilePath = bubastis.diff
self.save
save
logger.info("Bubastis diff generated successfully for #{self.id}")
logger.flush
rescue Exception => e
Expand Down Expand Up @@ -436,7 +442,7 @@ def generate_umls_metrics_file(tr_file_path=nil)
self.generate_metrics_file(class_count, indiv_count, prop_count)
end

def generate_rdf(logger, file_path, reasoning=true)
def generate_rdf(logger, reasoning: true)
mime_type = nil

if self.hasOntologyLanguage.umls?
Expand All @@ -458,10 +464,7 @@ def generate_rdf(logger, file_path, reasoning=true)
logger.info("error deleting owlapi.rdf")
end
end
owlapi = LinkedData::Parser::OWLAPICommand.new(
File.expand_path(file_path),
File.expand_path(self.data_folder.to_s),
master_file: self.masterFileName)
owlapi = owlapi_parser(logger: nil)

if !reasoning
owlapi.disable_reasoner
Expand Down Expand Up @@ -998,7 +1001,6 @@ def process_submission(logger, options={})
self.save

# Parse RDF
file_path = nil
begin
if not self.valid?
error = "Submission is not valid, it cannot be processed. Check errors."
Expand All @@ -1010,9 +1012,7 @@ def process_submission(logger, options={})
end
status = LinkedData::Models::SubmissionStatus.find("RDF").first
remove_submission_status(status) #remove RDF status before starting
zip_dst = unzip_submission(logger)
file_path = zip_dst ? zip_dst.to_s : self.uploadFilePath.to_s
generate_rdf(logger, file_path, reasoning=reasoning)
generate_rdf(logger, reasoning: reasoning)
add_submission_status(status)
self.save
rescue Exception => e
Expand Down Expand Up @@ -1587,8 +1587,52 @@ def delete_classes_graph
Goo.sparql_data_client.delete_graph(self.id)
end


def master_file_path
path = if zipped?
File.join(self.zip_folder, self.masterFileName)
else
self.uploadFilePath
end
File.expand_path(path)
end

def parsable?(logger: Logger.new($stdout))
owlapi = owlapi_parser(logger: logger)
owlapi.disable_reasoner
parsable = true
begin
owlapi.parse
rescue StandardError => e
parsable = false
end
parsable
end


private


def owlapi_parser_input
path = if zipped?
self.zip_folder
else
self.uploadFilePath
end
File.expand_path(path)
end


def owlapi_parser(logger: Logger.new($stdout))
unzip_submission(logger)
LinkedData::Parser::OWLAPICommand.new(
owlapi_parser_input,
File.expand_path(self.data_folder.to_s),
master_file: self.masterFileName,
logger: logger)
end


def delete_and_append(triples_file_path, logger, mime_type = nil)
Goo.sparql_data_client.delete_graph(self.id)
Goo.sparql_data_client.put_triples(self.id, triples_file_path, mime_type)
Expand Down
62 changes: 37 additions & 25 deletions lib/ontologies_linked_data/utils/file.rb
Original file line number Diff line number Diff line change
Expand Up @@ -9,23 +9,31 @@ module FileHelpers

def self.zip?(file_path)
file_path = file_path.to_s
unless File.exist? file_path
raise ArgumentError, "File path #{file_path} not found"
end
raise ArgumentError, "File path #{file_path} not found" unless File.exist? file_path

file_type = `file --mime -b #{Shellwords.escape(file_path)}`
file_type.split(';')[0] == 'application/zip'
end

def self.gzip?(file_path)
file_path = file_path.to_s
raise ArgumentError, "File path #{file_path} not found" unless File.exist? file_path

file_type = `file --mime -b #{Shellwords.escape(file_path)}`
return file_type.split(";")[0] == "application/zip"
file_type.split(';')[0] == 'application/x-gzip'
end

def self.files_from_zip(file_path)
file_path = file_path.to_s
unless File.exist? file_path
raise ArgumentError, "File path #{file_path} not found"
end

files = []
Zip::File.open(file_path) do |zipfile|
zipfile.each do |file|
if not file.directory?
if not file.name.split("/")[-1].start_with? "." #a hidden file in __MACOSX or .DS_Store
if not file.name.split('/')[-1].start_with? '.' #a hidden file in __MACOSX or .DS_Store
files << file.name
end
end
Expand All @@ -37,26 +45,30 @@ def self.files_from_zip(file_path)
def self.unzip(file_path, dst_folder)
file_path = file_path.to_s
dst_folder = dst_folder.to_s
unless File.exist? file_path
raise ArgumentError, "File path #{file_path} not found"
end
unless Dir.exist? dst_folder
raise ArgumentError, "Folder path #{dst_folder} not found"
end
raise ArgumentError, "File path #{file_path} not found" unless File.exist? file_path
raise ArgumentError, "Folder path #{dst_folder} not found" unless Dir.exist? dst_folder

extracted_files = []
Zip::File.open(file_path) do |zipfile|
zipfile.each do |file|
if file.name.split("/").length > 1
sub_folder = File.join(dst_folder,
file.name.split("/")[0..-2].join("/"))
unless Dir.exist?(sub_folder)
FileUtils.mkdir_p sub_folder
if gzip?(file_path)
Zlib::GzipReader.open(file_path) do |gz|
File.open([dst_folder, gz.orig_name].join('/'), "w") { |file| file.puts(gz.read) }
extracted_files << GzipFile.new(gz)
end
else
Zip::File.open(file_path) do |zipfile|
zipfile.each do |file|
if file.name.split('/').length > 1
sub_folder = File.join(dst_folder,
file.name.split('/')[0..-2].join('/'))
unless Dir.exist?(sub_folder)
FileUtils.mkdir_p sub_folder
end
end
extracted_files << file.extract(File.join(dst_folder,file.name))
end
extracted_files << file.extract(File.join(dst_folder,file.name))
end
end
return extracted_files
extracted_files
end

def self.automaster?(path, format)
Expand All @@ -65,13 +77,13 @@ def self.automaster?(path, format)

def self.automaster(path, format)
files = self.files_from_zip(path)
basename = File.basename(path, ".zip")
basename = File.basename(path, '.zip')
basename = File.basename(basename, format)
files.select {|f| File.basename(f, format).downcase.eql?(basename.downcase)}.first
end

def self.repeated_names_in_file_list(file_list)
return file_list.group_by {|x| x.split("/")[-1]}.select { |k,v| v.length > 1}
return file_list.group_by {|x| x.split('/')[-1]}.select { |k,v| v.length > 1}
end

def self.exists_and_file(path)
Expand All @@ -95,7 +107,7 @@ def self.download_file(uri, limit = 10)
http_session.use_ssl = (uri.scheme == 'https')
http_session.start do |http|
http.read_timeout = 1800
http.request_get(uri.request_uri, {"Accept-Encoding" => "gzip"}) do |res|
http.request_get(uri.request_uri, {'Accept-Encoding' => 'gzip'}) do |res|
if res.kind_of?(Net::HTTPRedirection)
new_loc = res['location']
if new_loc.match(/^(http:\/\/|https:\/\/)/)
Expand All @@ -108,7 +120,7 @@ def self.download_file(uri, limit = 10)

raise Net::HTTPBadResponse.new("#{uri.request_uri}: #{res.code}") if res.code.to_i >= 400

file_size = res.read_header["content-length"].to_i
file_size = res.read_header['content-length'].to_i
begin
content_disposition = res.read_header['content-disposition']
filenames = content_disposition.match(/filename=\"(.*)\"/) || content_disposition.match(/filename=(.*)/)
Expand All @@ -120,7 +132,7 @@ def self.download_file(uri, limit = 10)
file.write(res.body)

if res.header['Content-Encoding'].eql?('gzip')
uncompressed_file = Tempfile.new("uncompressed-ont-rest-file")
uncompressed_file = Tempfile.new('uncompressed-ont-rest-file')
file.rewind
sio = StringIO.new(file.read)
gz = Zlib::GzipReader.new(sio)
Expand Down

0 comments on commit 629aa39

Please sign in to comment.