Skip to content

Commit

Permalink
Improved getArxivId Implementation
Browse files Browse the repository at this point in the history
  • Loading branch information
ar-rana committed Jan 1, 2025
1 parent 06b6bb0 commit 457bb3f
Show file tree
Hide file tree
Showing 3 changed files with 9 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -370,8 +370,6 @@ Optional<BibEntry> getEntryFromPDFContent(String firstpageContents, String lineS
String publisher = null;

EntryType type = StandardEntryType.InProceedings;
// sometimes ArXiv ID is read before all parameters
getArXivId(null);
if (curString.length() > 4) {
// special case: possibly conference as first line on the page
extractYear();
Expand All @@ -392,6 +390,8 @@ Optional<BibEntry> getEntryFromPDFContent(String firstpageContents, String lineS
}
}
}
// sometimes ArXiv ID is read before title
getArXivId(null);
// start: title
fillCurStringWithNonEmptyLines();
title = streamlineTitle(curString);
Expand Down Expand Up @@ -609,11 +609,15 @@ private String getDoi(String doi) {

private String getArXivId(String arXivId) {
if (arXivId == null) {
arXivId = ArXivIdentifier.parse(curString).map(ArXivIdentifier::asString).orElse(null);
String arXiv = curString.split(" ")[0];
arXivId = ArXivIdentifier.parse(arXiv).map(ArXivIdentifier::asString).orElse(null);
if (arXivId != null) {
if (curString.length() > arXivId.length() + 7) {
// The arxiv string also contains the year
curString = curString.substring(arXivId.length() + 7);
extractYear();
curString = "";
proceedToNextNonEmptyLine();
}
return arXivId;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ public class ArXivIdentifier extends EprintIdentifier {
}

public static Optional<ArXivIdentifier> parse(String value) {
String identifier = value.split(" ")[0];
String identifier = value.replace(" ", "");
Pattern identifierPattern = Pattern.compile("(" + ARXIV_PREFIX + ")?\\s?:?\\s?(?<id>\\d{4}\\.\\d{4,5})(v(?<version>\\d+))?\\s?(\\[(?<classification>\\S+)\\])?");
Matcher identifierMatcher = identifierPattern.matcher(identifier);
if (identifierMatcher.matches()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ British Journal of Nutrition (2008), 99, 1–11 doi: 10.1017/S0007114507795296
void extractArXivFromPage1() {
BibEntry entry = new BibEntry(StandardEntryType.TechReport)
.withField(StandardField.AUTHOR, "Filippo Riccaa and Alessandro Marchettob and Andrea Stoccoc")
.withField(StandardField.TITLE, "[cs.SE] 12 Aug 2024 A Multi-Year Grey Literature Review on AI-assisted Test Automation")
.withField(StandardField.TITLE, "A Multi-Year Grey Literature Review on AI-assisted Test Automation")
.withField(StandardField.YEAR, "2024")
.withField(StandardField.EPRINT, "2408.06224v1")
.withField((StandardField.KEYWORDS), "Test Automation Artificial Intelligence AI-assisted Test Automation Grey Literature Automated Test Generation Self-Healing Test Scripts");
Expand Down

0 comments on commit 457bb3f

Please sign in to comment.