Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add multimodality table to documentation #639

Merged
merged 10 commits into from
Jul 30, 2024
170 changes: 110 additions & 60 deletions docs/generate_model_js.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import json

import os
import glob

def get_key_val(part):
key_index_start = part.index('[')
Expand All @@ -13,48 +14,8 @@ def get_key_val(part):
return key, val


# Read the content from README.md
with open('../README.md', 'r') as file:
content = file.read()

# Extract the relevant information from the content
models = []
lines = content.split('\n')
lines = lines[lines.index('## Models') + 4: lines.index('## Resources') - 2]

headers = []
headers = lines[0].split('|')[1:-1]
headers = [header.strip() for header in headers]

for line in lines[2:]:
parts = line.split('|')[1:-1]
parts = [part.strip() for part in parts]
model = dict(zip(headers, parts))
models.append(model)

year = None

for model in models:
# handle empty years
if model["Year"] == "":
model["Year"] = year
else:
year = model["Year"]

# handle model, docs and paper part
name_paper_str = model["Model and Paper"]

for i, part in enumerate(name_paper_str.split(', ')):
key, val = get_key_val(part)

if i == 0:
model["Name"] = key
model["Link"] = val
else:
model[key] = val

def add_pytorch_tensorflow(model):
# handle environment part

env_part = model["Environment"].split(', ')[0]

search_dict = {
Expand All @@ -74,28 +35,117 @@ def get_key_val(part):
else:
for header, _ in search_dict.items():
model[header] = False


def add_modalities_for_model(model):
modalities_keywords = {
"User Text": "user_text",
"User Image": "user_image",
"User Graph": "user_graph",
"Item Text": "item_text",
"Item Image": "item_image",
"Item Graph": "item_graph",
"Sentiment": "sentiment",
"Review Text": "review_text",
}

# remove non required keys
model.pop("Model and Paper")
model.pop("Environment")

# Get package name
model_dir = model["Link"]
for filename in glob.glob(f'../{model["Link"]}/*.py', recursive=True):
with open(filename, 'r') as file:
file_data = file.read()

for header, modality_keyword in modalities_keywords.items():
is_found = modality_keyword in file_data
if is_found:
model[header] = True

# for user feature and item feature
# >> if user feature is found, we set user text, image and graph to true
is_found = "user_feature" in file_data
if is_found:
model["User Text"] = True
model["User Image"] = True
model["User Graph"] = True

# likewise for item feature
is_found = "item_feature" in file_data
if is_found:
model["Item Text"] = True
model["Item Image"] = True
model["Item Graph"] = True

with open(f'../{model_dir}/__init__.py', 'r') as file:
init_data = file.read()
for header, modality_keyword in modalities_keywords.items():
if header not in model:
model[header] = False


if __name__ == "__main__":
# Read the content from README.md
with open('../README.md', 'r') as file:
content = file.read()

# Extract the relevant information from the content
models = []
lines = content.split('\n')
lines = lines[lines.index('## Models') + 4: lines.index('## Resources') - 2]

headers = []
headers = lines[0].split('|')[1:-1]
headers = [header.strip() for header in headers]

for line in lines[2:]:
parts = line.split('|')[1:-1]
parts = [part.strip() for part in parts]
model = dict(zip(headers, parts))
models.append(model)

year = None

for model in models:
# handle empty years
if model["Year"] == "":
model["Year"] = year
else:
year = model["Year"]

# handle model, docs and paper part
name_paper_str = model["Model and Paper"]

for i, part in enumerate(name_paper_str.split(', ')):
key, val = get_key_val(part)

if i == 0:
model["Name"] = key
model["Link"] = val
else:
model[key] = val

package_names = []
# Check for PyTorch and TensorFlow in each requirements file
add_pytorch_tensorflow(model)

for row in init_data.split('\n'):
if "import" in row:
package_name = row[row.index("import") + len("import "):]
package_names.append(f"cornac.models.{package_name}")
# Check for modalities keywords in files and add to model
add_modalities_for_model(model)

# remove non required keys
model.pop("Model and Paper")
model.pop("Environment")

model["packages"] = package_names
# Get package name
model_dir = model["Link"]

with open(f'../{model_dir}/__init__.py', 'r') as file:
init_data = file.read()

package_names = []

for row in init_data.split('\n'):
if "import" in row:
package_name = row[row.index("import") + len("import "):]
package_names.append(f"cornac.models.{package_name}")

model["packages"] = package_names

json_str = json.dumps(models, indent=4)
json_str = json.dumps(models, indent=4)

# Write the JSON object to a file
with open('source/_static/models/data.js', 'w') as file:
file.write(f"var data = {json_str};")
# Write the JSON object to a file
with open('source/_static/models/data.js', 'w') as file:
file.write(f"var data = {json_str};")
Loading
Loading