Skip to content

Commit

Permalink
Remove event_id field.
Browse files Browse the repository at this point in the history
  • Loading branch information
anjackson committed Jul 4, 2019
1 parent 5db42bb commit 9bb30e5
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 5 deletions.
4 changes: 2 additions & 2 deletions crawldb/hadoop.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,11 +135,11 @@ def mapper(self, line):

# Yield this line if there seems there is no key collision with the previous line:
if self.last_c and c.ssurt == self.last_c.ssurt and c.timestamp == self.last_c.timestamp:
logger.warning("Skipping line %i because the last line appears to collide with this one.")
logger.warning("Skipping line %i because the last line appears to collide with this one." % self.line_counter)
logger.warning("Prev line %s" % self.last_c.line)
logger.warning("Curr line %s" % c.line)
else:
yield c.upsert_values(self.line_counter)
yield c.upsert_values()

# Remember this line as the last line:
self.last_c = c
Expand Down
6 changes: 3 additions & 3 deletions crawldb/heritrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ def stats(self):
"""
return self.stats

upsert_sql = """UPSERT INTO crawl_log (ssurt, timestamp, event_id, url, host, domain, content_type, content_length, content_digest, via, hop_path, status_code, ip ) VALUES %s"""
upsert_sql = """UPSERT INTO crawl_log (ssurt, timestamp, url, host, domain, content_type, content_length, content_digest, via, hop_path, status_code, ip ) VALUES %s"""

def upsert_values(self, event_id):
return (self.ssurt, self.timestamp, event_id, self.url, self.host, self.domain, self.mime, self.content_length, self.hash, self.via, self.hop_path, self.status_code, self.ip)
def upsert_values(self):
return (self.ssurt, self.timestamp, self.url, self.host, self.domain, self.mime, self.content_length, self.hash, self.via, self.hop_path, self.status_code, self.ip)

0 comments on commit 9bb30e5

Please sign in to comment.