From 2bee6f379a89b8c92585eba97e4aaa677e128e78 Mon Sep 17 00:00:00 2001
From: Markus Kaiser <markus.kaiser@siemens.com>
Date: Tue, 17 May 2022 13:59:27 +0100
Subject: [PATCH 1/2] Add cicd-mode to create_volume.rb

---
 ruby/create_volume.rb |   8 +++-
 ruby/mlresearch.rb    | 107 +++++++++++++++++++++---------------------
 2 files changed, 60 insertions(+), 55 deletions(-)

diff --git a/ruby/create_volume.rb b/ruby/create_volume.rb
index 74063fd..103ae3f 100755
--- a/ruby/create_volume.rb
+++ b/ruby/create_volume.rb
@@ -22,6 +22,7 @@
 reponame=nil
 supp_file = nil
 supp_name = nil
+cicd_mode = false
 
 OptionParser.new do |parser|
   parser.banner = "Usage: create_volume.rb -v VOLUME -b BIBFILE [optional]"
@@ -58,6 +59,10 @@
             "A csv file containing information about supplementary label") do |label|
     supp_name=label
   end
+  parser.on("-c", "--[no-]cicd-mode",
+            "Validate bibfile only. Will not produce a valid website, but does not require pdfs.") do |cicd|
+    cicd_mode=cicd
+  end
 
 end.parse!
 
@@ -83,9 +88,8 @@
 # Write the papers
 directory_name = "_posts"
 Dir.mkdir(directory_name) unless File.exists?(directory_name)
-MLResearch.extractpapers(bib_file, volume_no, volume_info, software_file, video_file, supp_file, supp_name)  
+MLResearch.extractpapers(bib_file, volume_no, volume_info, software_file, video_file, supp_file, supp_name, cicd_mode)
 out = File.open('index.html', 'w')
 out.puts "---"
 out.puts "layout: home"
 out.puts "---"
-
diff --git a/ruby/mlresearch.rb b/ruby/mlresearch.rb
index 733be59..3b0a487 100644
--- a/ruby/mlresearch.rb
+++ b/ruby/mlresearch.rb
@@ -34,7 +34,7 @@ def self.basedir
     # Get base of directory containing `papersite` repo by going three
     # steps up from where this file is located
     File.dirname(__FILE__).split('/')[0..-3].join('/')
-  end 
+  end
   def self.procdir
     self.basedir + '/'
   end
@@ -75,7 +75,7 @@ def self.detex(string_in)
     LaTeX::Decode::Punctuation.decode!(string)
     LaTeX::Decode::Symbols.decode!(string)
     LaTeX::Decode::Greek.decode!(string)
-    
+
     LaTeX::Decode::Base.strip_braces(string)
 
     LaTeX.normalize_C(string)
@@ -94,7 +94,7 @@ def self.detex_abstract(string)
     #LaTeX::Decode::Symbols.decode!(string)
     #LaTeX::Decode::Greek.decode!(string)
     # Don't remove brackets as it messes up maths.
-    
+
     LaTeX.normalize_C(string)
     # Need to deal with different encodings. Map to utf-8
   end
@@ -109,21 +109,21 @@ def self.detex_tex_title(string)
     LaTeX::Decode::Diacritics.decode!(string)
     LaTeX::Decode::Punctuation.decode!(string)
     LaTeX::Decode::Symbols.decode!(string)
-    LaTeX::Decode::Greek.decode!(string)    
+    LaTeX::Decode::Greek.decode!(string)
     LaTeX.normalize_C(string)
     # Need to deal with different encodings. Map to utf-8
   end
   #def self.detex_abstract(text)
   #  return PandocRuby.convert(text, {:from => :latex, :to => :markdown}, 'no-wrap')[0..-2]
   #end
-  
+
   def self.bibtohash(obj)
     # Takes an bib file object and returns a cleaned up hash.
     # Params:
     # +obj+:: Object to clean up
     # +bib+:: +BibTeX+ object that contains strings etc
     # +errhandler+:: +Proc+ object that takes a pipe object as first and only param (may be nil)
-    
+
     ha = obj.to_hash(:quotes=>'').stringify_keys!()
     ha['layout'] = ha['bibtex_type'].to_s
     ha.tap { |hs| hs.delete('bibtex_type') }
@@ -132,13 +132,13 @@ def self.bibtohash(obj)
     ha['issn'] = '2640-3498'
     ha['id'] = ha['bibtex_key'].to_s
     ha.tap { |hs| hs.delete('bibtex_key') }
-    
+
     #ha['categories'] = Array.new(1)
     #ha['categories'][0] = ha['key']
-    
+
     ha['month'] = ha['month_numeric'].to_i
     ha.tap { |hs| hs.delete('month_numeric') }
-    
+
     ha.delete_if {|key, value| key[0..2] == "opt" }
 
     if ha.has_key?('abstract')
@@ -173,7 +173,7 @@ def self.bibtohash(obj)
     if ha.has_key?('firstpage')
       ha['order'] = ha['firstpage'].to_i
     end
-    
+
     published = ha['published']
     ha['cycles'] = false
     if ha.has_key?('sections')
@@ -192,14 +192,14 @@ def self.bibtohash(obj)
       end
       ha['sections'] = hasections
     end
-    
+
     if ha.has_key?('editor')
       ha['bibtex_editor'] = ha['editor']
       editor = splitauthors(ha, obj, type=:editor)
       ha.tap { |hs| hs.delete('editor') }
       ha['editor'] = editor
     end
-    
+
     if ha.has_key?('author')
       ha['bibtex_author'] = ha['author']
       author = splitauthors(ha, obj)
@@ -222,7 +222,7 @@ def self.bibtohash(obj)
     if ha.has_key?('end')
       ha['end'] = Date.parse ha['end']
     end
-    
+
     return ha
   end
   def self.yamltohash(obj)
@@ -234,13 +234,13 @@ def self.mindigit(str, num=2)
     end
     return str
   end
-  
+
   def self.filename(date, title)
     puts title
     f = date.to_s + '-' + title.to_s + '.md'
     return f
   end
-  
+
   def self.splitauthors(ha, obj, type=:author)
     puts obj[:author]
     a = Array.new(obj[type].length)       #=> [nil, nil, nil]
@@ -260,7 +260,7 @@ def self.splitauthors(ha, obj, type=:author)
     end
     return a
   end
-  
+
   def self.disambiguate_chars(count)
     div, mod = count.divmod(26)
     if div == 0
@@ -269,9 +269,9 @@ def self.disambiguate_chars(count)
       return disambiguate_chars(div-1) + (mod+97).chr
     end
   end
-  def self.extractpapers(bib_file, volume_no, volume_info, software_file=nil, video_file=nil, supp_file=nil, supp_name=nil)
+  def self.extractpapers(bib_file, volume_no, volume_info, software_file=nil, video_file=nil, supp_file=nil, supp_name=nil, cicd_mode=false)
     # Extract paper info from bib file and put it into yaml files in _posts
-    
+
     # Extract information about software links from a csv file.
     if software_file.nil?
       software_data = nil
@@ -292,8 +292,8 @@ def self.extractpapers(bib_file, volume_no, volume_info, software_file=nil, vide
     else
       supp_data = Hash[*CSV.read(supp_file).flatten]
     end
-    
-    
+
+
     file = File.open(bib_file, "rb")
     contents = file.read
 
@@ -326,7 +326,7 @@ def self.extractpapers(bib_file, volume_no, volume_info, software_file=nil, vide
           end
         end
       end
-      
+
       ha['address'] = volume_info['address']
       ha['publisher'] = 'PMLR'
       ha['container-title'] = volume_info['booktitle']
@@ -351,25 +351,27 @@ def self.extractpapers(bib_file, volume_no, volume_info, software_file=nil, vide
       #puts ha['author'][0]['family'] + published.year.to_s.slice(-2,-1) + 'a'
       #puts ha['id']
 
-      # True for volumes that didn't necessarily conform to original layout
-      inc_layout = ([27..53] + [55..56] + [63..64]).include?(volume_no.to_i)
-      # Move all pdfs to correct directory with correct filename
-      if inc_layout
-        ha['pdf'] = 'https://proceedings.mlr.press' + '/' + volume_info['volume_dir'] + '/' + ha['id'] + '.pdf'
-      else
-        if File.file?(ha['id'] + '.pdf')
-          Dir.mkdir(filestub) unless File.exists?(filestub)
-          if not File.file?(filestub + '/' + filestub + '.pdf')
-            FileUtils.mv(ha['id'] + '.pdf', filestub + '/' + filestub + '.pdf')
-          end
-        end
-        if File.file?(filestub + '/' + filestub + '.pdf')
-          ha['pdf'] = 'https://proceedings.mlr.press' + '/' + volume_info['volume_dir'] + '/' + filestub + '/' + filestub + '.pdf'
+      if not cicd_mode
+        # True for volumes that didn't necessarily conform to original layout
+        inc_layout = ([27..53] + [55..56] + [63..64]).include?(volume_no.to_i)
+        # Move all pdfs to correct directory with correct filename
+        if inc_layout
+          ha['pdf'] = 'https://proceedings.mlr.press' + '/' + volume_info['volume_dir'] + '/' + ha['id'] + '.pdf'
         else
-          raise "PDF " + filestub + '/' + filestub + '.pdf' + " file not present"
+          if File.file?(ha['id'] + '.pdf')
+            Dir.mkdir(filestub) unless File.exists?(filestub)
+            if not File.file?(filestub + '/' + filestub + '.pdf')
+              FileUtils.mv(ha['id'] + '.pdf', filestub + '/' + filestub + '.pdf')
+            end
+          end
+          if File.file?(filestub + '/' + filestub + '.pdf')
+            ha['pdf'] = 'https://proceedings.mlr.press' + '/' + volume_info['volume_dir'] + '/' + filestub + '/' + filestub + '.pdf'
+          else
+            raise "PDF " + filestub + '/' + filestub + '.pdf' + " file not present"
+          end
         end
       end
-      
+
       # Add software link if it is available.
       if not ha.has_key?('software') and not software_data.nil? and software_data.has_key?(ha['id'])
           ha['software'] = software_data[ha['id']]
@@ -409,20 +411,20 @@ def self.extractpapers(bib_file, volume_no, volume_info, software_file=nil, vide
         end
       end
 
-        
-        
+
+
       # If it's not in the bad layout then update key
       if not inc_layout
         ha['id'] = filestub
       end
-      
+
       ya = ha.to_yaml(:ExplicitTypes => true)
       fname = filename(published, filestub)
       out = File.open('_posts/' + fname, 'w')
       out.puts ya
       out.puts "# Format based on citeproc: http://blog.martinfenner.org/2013/07/30/citeproc-yaml-for-bibliographies/"
       out.puts "---"
-    end  
+    end
   end
 
   def self.extractconfig()
@@ -431,8 +433,8 @@ def self.extractconfig()
     return ha
   end
 
-    
-  
+
+
   def self.bibextractconfig(bibfile, volume_no, volume_type, volume_prefix)
     # Extract information about the volume from the bib file, place in _config.yml
     file = File.open(bibfile, "rb")
@@ -448,7 +450,7 @@ def self.bibextractconfig(bibfile, volume_no, volume_type, volume_prefix)
     booktitle = ha['booktitle']
     ha['description'] = booktitle
     if ha.has_key?('address')
-      ha['description'] += "\n  Held in " + ha['address'] 
+      ha['description'] += "\n  Held in " + ha['address']
     end
     if ha.has_key?('start') and ha.has_key?('end')
       ha['description'] += " on "
@@ -537,7 +539,7 @@ def self.bibextractconfig(bibfile, volume_no, volume_type, volume_prefix)
     ha.tap { |hs| hs.delete('address') }
     ha.tap { |hs| hs.delete('conference_url') }
     ha.tap { |hs| hs.delete('name') }
-    
+
     ha['analytics'] = {'google' => {'tracking_id' => self.tracking_id}}
     ha['orig_bibfile'] = bibfile
     return ha
@@ -547,11 +549,11 @@ def self.write_volume_files(ha)
     write_index(ha)
     write_readme(ha)
     write_gemfile(ha)
-  end  
+  end
   def self.write_config(ha)
     ya = ha.to_yaml(:ExplicitTypes => true)
 
-    out = File.open('_config.yml', 'w')    
+    out = File.open('_config.yml', 'w')
     out.puts ya
     out.puts "# Site settings"
     out.puts "# Original source:  " + ha['orig_bibfile']
@@ -564,7 +566,7 @@ def self.write_index(ha)
     out.puts "---"
   end
   def self.write_gemfile(ha)
-  
+
     out = File.open('Gemfile', 'w')
     # frozen_string_literal: true
     out.puts 'source "https://rubygems.org"'
@@ -588,7 +590,7 @@ def self.write_readme(ha)
     out = File.open('README.md', 'w')
     readme = ''
     readme += "\n\nPublished as " + ha['volume_type'] + " " + ha['volume'] + " by the Proceedings of Machine Learning Research on #{ha['published'].strftime('%d %B %Y')}." + "\n"
-    
+
     if ha.has_key?('editor')
       readme += "\nVolume Edited by:\n"
       for name in ha['editor']
@@ -609,7 +611,7 @@ def self.write_readme(ha)
     out.puts '# PMLR ' + ha['volume']
     out.puts
     out.puts 'To suggest fixes to this volume please make a pull request containing the changes requested and a justification for the changes.'
-    out.puts 
+    out.puts
     out.puts 'To edit the details of this conference work edit the [_config.yml](./_config.yml) file and submit a pull request.'
     out.puts
     out.puts 'To make changes to the individual paper details, edit the associated paper file in the [./_posts](./_posts) subdirectory.'
@@ -619,7 +621,6 @@ def self.write_readme(ha)
     out.puts 'For details of what is required to submit a proceedings please check https://proceedings.mlr.press/spec.html'
     out.puts
     out.puts readme
-    
-  end  
-end
 
+  end
+end

From 1234cf3261afca7f0a084d745bfa7fdf2048f525 Mon Sep 17 00:00:00 2001
From: Markus Kaiser <markus.kaiser@siemens.com>
Date: Tue, 17 May 2022 14:00:05 +0100
Subject: [PATCH 2/2] Add a comment

---
 ruby/mlresearch.rb | 1 +
 1 file changed, 1 insertion(+)

diff --git a/ruby/mlresearch.rb b/ruby/mlresearch.rb
index 3b0a487..06e277f 100644
--- a/ruby/mlresearch.rb
+++ b/ruby/mlresearch.rb
@@ -351,6 +351,7 @@ def self.extractpapers(bib_file, volume_no, volume_info, software_file=nil, vide
       #puts ha['author'][0]['family'] + published.year.to_s.slice(-2,-1) + 'a'
       #puts ha['id']
 
+      # cicd_mode ignores pdfs since they will generally not be available.
       if not cicd_mode
         # True for volumes that didn't necessarily conform to original layout
         inc_layout = ([27..53] + [55..56] + [63..64]).include?(volume_no.to_i)