Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add xml #140

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 11 additions & 7 deletions hfx/xml/config.txt
Original file line number Diff line number Diff line change
@@ -1,12 +1,16 @@
locus=HLA-DRB1
resolution=2-Field
method=Haplotype Frequency Estimation
parameter=SomeParameter
value=SomeValue
locus=HLA-A
resolution=g
locus=HLA-C
resolution=g
method=Haplotype Frequency Estimation via EM
parameter=Cut-off
value=0.000000001
parameter=SomeOtherParameter
value=BetterValue
geoCode=US
ethnicity=Caucasian
ethnicity=SampleData
species=Human
description=Those guys
inputSize=1000
inputSize=100
kind=imgt
version=3.40.0
71 changes: 59 additions & 12 deletions hfx/xml/csv2xml.pl
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,6 @@

# 1) Read optional config file for meta data, or use defaults
my %config = (
locus => 'HLA-DRB1',
resolution => '2-Field',
method => 'Unknown',
parameter => 'Unknown',
value => 'Unknown',
Expand All @@ -35,15 +33,51 @@
);

# If config file is provided, override default metadata with its values
my @output_resolutions;
my @parameters;
if ($config_file) {
open my $cfg_fh, '<', $config_file or die "Cannot open config file: $!";
my ($current_locus, $current_resolution, $current_parameter, $current_value);
while (<$cfg_fh>) {
chomp;
my ($key, $value) = split(/\s*=\s*/, $_);
$config{$key} = $value;
if ($key eq 'locus') {
$current_locus = $value;
} elsif ($key eq 'resolution') {
$current_resolution = $value;
} elsif ($key eq 'parameter') {
$current_parameter = $value;
} elsif ($key eq 'value') {
$current_value = $value;
}

# Collect locus-resolution pairs
if ($current_locus && $current_resolution) {
push @output_resolutions, { locus => $current_locus, resolution => $current_resolution };
$current_locus = undef;
$current_resolution = undef;
}

# Collect parameter-value pairs
if ($current_parameter && $current_value) {
push @parameters, { parameter => $current_parameter, value => $current_value };
$current_parameter = undef;
$current_value = undef;
}

# Other general config values
$config{$key} = $value unless $key eq 'locus' || $key eq 'resolution' || $key eq 'parameter' || $key eq 'value';
}
close $cfg_fh;
}
# If no locus/resolution pairs are found, add default pair
if (!@output_resolutions) {
push @output_resolutions, { locus => 'HLA-DRB1', resolution => '2-Field' };
}





# 2) Read and parse CSV file if provided, or use an empty dataset
my @haplotype_data;
Expand Down Expand Up @@ -74,20 +108,31 @@
# Add metadata from config (or defaults if config file not provided)
$writer->startTag('metaData');

# OutputResolution

# OutputResolution (multiple locus/resolution pairs)
$writer->startTag('outputResolution');
$writer->dataElement('locus', $config{'locus'});
if ($config{'resolution'}) {
$writer->dataElement('resolution', $config{'resolution'});
foreach my $res (@output_resolutions) {
$writer->startTag('resolutionPair');
$writer->dataElement('locus', $res->{locus});
if ($res->{resolution}) {
$writer->dataElement('resolution', $res->{resolution});
}
$writer->endTag('resolutionPair');
}
$writer->endTag('outputResolution');


# hfeMethod
$writer->startTag('hfeMethod');
$writer->dataElement('method', $config{'method'});
# Add all parameter-value pairs
$writer->startTag('parameters');
$writer->dataElement('parameter', $config{'parameter'});
$writer->dataElement('value', $config{'value'});
foreach my $param (@parameters) {
$writer->startTag('parameterPair');
$writer->dataElement('parameter', $param->{parameter});
$writer->dataElement('value', $param->{value});
$writer->endTag('parameterPair');
}
$writer->endTag('parameters');
$writer->endTag('hfeMethod');

Expand All @@ -111,20 +156,22 @@
$writer->endTag('metaData');

# Add haplotype frequency data if CSV was provided, otherwise skip
$writer->startTag('frequencyData');
if (@haplotype_data) {
foreach my $data (@haplotype_data) {
$writer->startTag('frequencyData');
$writer->startTag('frequencyPair');
$writer->dataElement('haplotype', $data->{haplotype});
$writer->dataElement('frequency', $data->{frequency});
$writer->endTag('frequencyData');
$writer->endTag('frequencyPair');
}
}
$writer->endTag('frequencyData');

$writer->endTag('hfxHaplotypeList');
$writer->end();

$output->close if $output_xml_file;

# exit;
# 5) Validate the generated XML against the XSD
if ($output_xml_file) {
my $schema = XML::LibXML::Schema->new(location => $xsd_file);
Expand Down
22 changes: 20 additions & 2 deletions hfx/xml/hfx_schema.xsd
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@
<xs:element name="outputResolution" minOccurs="1" maxOccurs="1">
<xs:complexType>
<xs:sequence>
<xs:element name="resolutionPair" minOccurs="1" maxOccurs="unbounded">
<xs:complexType>
<xs:sequence>
<xs:element name="locus" type="xs:string" />
<xs:element name="resolution" minOccurs="0">
<xs:simpleType>
Expand All @@ -27,18 +30,27 @@
</xs:element>
</xs:sequence>
</xs:complexType>
</xs:element>
</xs:sequence>
</xs:complexType>
</xs:element>
<xs:element name="hfeMethod" minOccurs="1">
<xs:complexType>
<xs:sequence>
<xs:element name="method" type="xs:string" />
<xs:element name="parameters" minOccurs="1" maxOccurs="1">
<xs:complexType>
<xs:sequence>
<xs:sequence >
<xs:element name="parameterPair" minOccurs="0" maxOccurs="unbounded">
<xs:complexType>
<xs:sequence>
<xs:element name="parameter" type="xs:string" />
<xs:element name="value" type="xs:string" />
</xs:sequence>
</xs:complexType>
</xs:element>
</xs:sequence>
</xs:complexType>
</xs:element>
</xs:sequence>
</xs:complexType>
Expand Down Expand Up @@ -79,13 +91,19 @@
</xs:sequence>
</xs:complexType>
</xs:element>
<xs:element name="frequencyData" minOccurs="0" maxOccurs="unbounded">
<xs:element name="frequencyData" minOccurs="0" maxOccurs="1">
<xs:complexType>
<xs:sequence>
<xs:element name="frequencyPair" minOccurs="0" maxOccurs="unbounded">
<xs:complexType>
<xs:sequence>
<xs:element name="haplotype" type="xs:string" />
<xs:element name="frequency" type="xs:decimal" />
</xs:sequence>
</xs:complexType>
</xs:element>
</xs:sequence>
</xs:complexType>
</xs:element>
</xs:sequence>
</xs:complexType>
Expand Down