Skip to content

Commit

Permalink
Include the relevant internal link "context" in the key, not the meta…
Browse files Browse the repository at this point in the history
…data

* See gjtorikian#695 (comment)
* The new combined key prevents using the metadata to store the information, and uniquely identifies what is relevant for checking internal links.
* This aligns internal and external collection and cache.
* The combined key needs to be serialized to a string when writing the cache to JSON and parsed when reading.
  • Loading branch information
riccardoporreca committed Jan 9, 2022
1 parent b56343f commit 837ef9e
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 53 deletions.
39 changes: 14 additions & 25 deletions lib/html_proofer/cache.rb
Original file line number Diff line number Diff line change
Expand Up @@ -57,21 +57,20 @@ def parsed_timeframe(timeframe)
end
end

def add_internal(url, metadata, found)
def add_internal(url, context, metadata, found)
return unless enabled?

@cache_log[:internal][url] = { time: @cache_time, metadata: [] } if @cache_log[:internal][url].nil?

@cache_log[:internal][url][:metadata] << construct_internal_link_metadata(metadata, found)
cache_key = { link: url, context: context }
@cache_log[:internal][cache_key] = { time: @cache_time.to_s, found: found, metadata: metadata }
end

def add_external(url, filenames, status_code, msg)
def add_external(url, metadata, status_code, msg)
return unless enabled?

found = status_code.between?(200, 299)

clean_url = cleaned_url(url)
@cache_log[:external][clean_url] = { time: @cache_time.to_s, found: found, status_code: status_code, message: msg, metadata: filenames }
@cache_log[:external][clean_url] = { time: @cache_time.to_s, found: found, status_code: status_code, message: msg, metadata: metadata }
end

def detect_url_changes(urls_detected, type)
Expand All @@ -82,27 +81,13 @@ def detect_url_changes(urls_detected, type)
additions
end

private def construct_internal_link_metadata(metadata, found)
{
source: metadata[:source],
current_path: metadata[:current_path],
line: metadata[:line],
base_url: metadata[:base_url],
found: found
}
end

# prepare to add new URLs detected
private def determine_additions(urls_detected, type)
additions = urls_detected.reject do |url, metadata|
if @cache_log[type].include?(url)

# if this is false, we're trying again
found = if type == :external
@cache_log[type][url][:found]
else
@cache_log[type][url][:metadata].all? { |m| m[:found] }
end
found = @cache_log[type][url][:found]
if found
# update the cached metadata (there might be more locations found cached for the link)
@cache_log[type][url][:metadata] = metadata
Expand Down Expand Up @@ -146,15 +131,19 @@ def detect_url_changes(urls_detected, type)
def write
return unless enabled?

File.write(@cache_file, @cache_log.to_json)
cache_write = @cache_log
cache_write[:internal].transform_keys!(&:to_json)
File.write(@cache_file, cache_write.to_json)
end

def retrieve_urls(urls_detected, type)
# if there are no urls, bail
return {} if urls_detected.empty?

urls_detected = urls_detected.transform_keys do |url|
cleaned_url(url)
if type == :external
urls_detected = urls_detected.transform_keys do |url|
cleaned_url(url)
end
end

urls_to_check = detect_url_changes(urls_detected, type)
Expand Down Expand Up @@ -205,7 +194,7 @@ def size(type)
elsif cache_version != CACHE_VERSION
# if cache version is newer...do something
else
log[:internal] = log[:internal].transform_keys(&:to_s)
log[:internal] = log[:internal].transform_keys { |key| JSON.parse(key.to_s, symbolize_names: true) }
log[:external] = log[:external].transform_keys(&:to_s)
log
end
Expand Down
16 changes: 8 additions & 8 deletions lib/html_proofer/check.rb
Original file line number Diff line number Diff line change
Expand Up @@ -53,16 +53,16 @@ def self.short_name
def add_to_internal_urls(url, line)
url_string = url.raw_attribute

@internal_urls[url_string] = [] if @internal_urls[url_string].nil?

metadata = {
context = {
source: @runner.current_source,
current_path: @runner.current_path,
line: line,
base_url: base_url,
found: nil
path: @runner.current_path,
base_url: base_url
}
@internal_urls[url_string] << metadata
url_key = { link: url_string, context: context }

@internal_urls[url_key] = [] if @internal_urls[url_key].nil?

@internal_urls[url_key] << { filename: @runner.current_path, line: line }
end

def add_to_external_urls(url, line)
Expand Down
39 changes: 19 additions & 20 deletions lib/html_proofer/url_validator/internal.rb
Original file line number Diff line number Diff line change
Expand Up @@ -22,27 +22,26 @@ def validate
end

def run_internal_link_checker(links)
links.each_pair do |link, matched_files|
matched_files.each do |metadata|
url = HTMLProofer::Attribute::Url.new(@runner, link, base_url: metadata[:base_url])

@runner.current_source = metadata[:source]
@runner.current_path = metadata[:current_path]

unless file_exists?(url)
@failed_checks << Failure.new(@runner.current_path, 'Links > Internal', "internally linking to #{url}, which does not exist", line: metadata[:line], status: nil, content: nil)
@cache.add_internal(url.to_s, metadata, false)
next
end

unless hash_exists?(url)
@failed_checks << Failure.new(@runner.current_path, 'Links > Internal', "internally linking to #{url}; the file exists, but the hash '#{url.hash}' does not", line: metadata[:line], status: nil, content: nil)
@cache.add_internal(url.to_s, metadata, false)
next
end

@cache.add_internal(url.to_s, metadata, true)
links.each_pair do |link, metadata|
context = link[:context]
url = HTMLProofer::Attribute::Url.new(@runner, link[:link], base_url: context[:base_url])

@runner.current_source = context[:source]
@runner.current_path = context[:path]

unless file_exists?(url)
metadata.each { |m| @failed_checks << Failure.new(m[:filename], 'Links > Internal', "internally linking to #{url}, which does not exist", line: m[:line]) }
@cache.add_internal(url.to_s, context, metadata, false)
next
end

unless hash_exists?(url)
metadata.each { |m| @failed_checks << Failure.new(m[:filename], 'Links > Internal', "internally linking to #{url}; the file exists, but the hash '#{url.hash}' does not", line: m[:line]) }
@cache.add_internal(url.to_s, context, metadata, false)
next
end

@cache.add_internal(url.to_s, context, metadata, true)
end

@failed_checks
Expand Down

0 comments on commit 837ef9e

Please sign in to comment.