Skip to content

Commit

Permalink
Merge pull request #68 from 4dn-dcic/kmp_C4-102_C4-92_cgap_bucketing
Browse files Browse the repository at this point in the history
CGAP bucketing fixes (C4-92, C4-102)
  • Loading branch information
willronchetti authored Mar 31, 2020
2 parents e00979b + 0a5d77f commit 95c5ff5
Show file tree
Hide file tree
Showing 10 changed files with 401 additions and 75 deletions.
69 changes: 47 additions & 22 deletions dcicutils/beanstalk_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from . import ff_utils
from botocore.exceptions import ClientError
from .misc_utils import PRINT
from .env_utils import is_cgap_env, is_stg_or_prd_env, public_url_mappings, blue_green_mirror_env

logging.basicConfig()
logger = logging.getLogger('logger')
Expand All @@ -35,9 +36,22 @@


FOURSIGHT_URL = 'https://foursight.4dnucleome.org/'
# magic CNAME corresponds to data.4dnucleome
MAGIC_CNAME = 'fourfront-green.us-east-1.elasticbeanstalk.com'
GOLDEN_DB = 'fourfront-production.co3gwj7b7tpq.us-east-1.rds.amazonaws.com'

# FF_MAGIC_CNAME corresponds to data.4dnucleome.org
FF_MAGIC_CNAME = 'fourfront-green.us-east-1.elasticbeanstalk.com'
# CGAP_MAGIC_CNAME corresponds to cgap.hms.harvard.edu
CGAP_MAGIC_CNAME = 'fourfront-cgap.9wzadzju3p.us-east-1.elasticbeanstalk.com'
# The legacy name MAGIC_CNAME is deprecated (retained for backward compatibility until a major release boundary).
MAGIC_CNAME = FF_MAGIC_CNAME

# FF_GOLDEN_DB is the database behind data.4dnucleome.org (and shared by staging.4dnucleome.org)
FF_GOLDEN_DB = 'fourfront-production.co3gwj7b7tpq.us-east-1.rds.amazonaws.com'
# CGAP_GOLDEN_DB is the database behind cgap.hms.harvard.edu
CGAP_GOLDEN_DB = 'fourfront-cgap.co3gwj7b7tpq.us-east-1.rds.amazonaws.com'
# The name GOLDEN_DB is deprecated (retained for backward compatibility until a major release boundary).
# Although not visibly used in this repository, this variable is imported by Torb.
GOLDEN_DB = FF_GOLDEN_DB

REGION = 'us-east-1'


Expand Down Expand Up @@ -206,24 +220,36 @@ def swap_cname(src, dest):
client.restart_app_server(EnvironmentName=dest)


def whodaman():
def _compute_prd_env_for_project(project):
'''
Determines which ElasticBeanstalk environment is currently hosting
data.4dnucleome.org. Requires IAM permissions for EB!
Returns:
str: EB environment name hosting data.4dnucleome
'''
magic_cname = CGAP_MAGIC_CNAME if project == 'cgap' else FF_MAGIC_CNAME
client = boto3.client('elasticbeanstalk', region_name=REGION)
res = describe_beanstalk_environments(client, ApplicationName="4dn-web")
logger.info(res)
for env in res['Environments']:
logger.info(env)
if env.get('CNAME') == MAGIC_CNAME:
if env.get('CNAME') == magic_cname:
# we found data
return env.get('EnvironmentName')


def compute_ff_prd_env():
return _compute_prd_env_for_project('ff')


whodaman = compute_ff_prd_env # This naming is deprecated but retained for compatibility.


def compute_cgap_prd_env():
return _compute_prd_env_for_project('cgap')


def beanstalk_info(env):
"""
Describe a ElasticBeanstalk environment given an environment name
Expand All @@ -242,7 +268,7 @@ def beanstalk_info(env):
def get_beanstalk_real_url(env):
"""
Return the real url for the elasticbeanstalk with given environment name.
Name can be 'data', 'staging', or an actual environment.
Name can be 'cgap', 'data', 'staging', or an actual environment.
Args:
env (str): ElasticBeanstalk environment name
Expand All @@ -251,25 +277,24 @@ def get_beanstalk_real_url(env):
str: url of the ElasticBeanstalk environment
"""
url = ''
urls = {'staging': 'http://staging.4dnucleome.org',
'data': 'https://data.4dnucleome.org'}
urls = public_url_mappings(env)

if env in urls:
if env in urls: # Special case handling of 'cgap', 'data', or 'staging' as an argument.
return urls[env]

# TODO (C4-91): Reconsider environment names.
# This code is too fragile.
if 'webprod' in env or 'blue' in env or 'green' in env:
data_env = whodaman()

if data_env == env:
url = urls['data']
else:
url = urls['staging']
else:
bs_info = beanstalk_info(env)
url = "http://" + bs_info['CNAME']

if is_stg_or_prd_env(env):
# What counts as staging/prod depends on whether we're in the CGAP or Fourfront space.
data_env = compute_cgap_prd_env() if is_cgap_env(env) else compute_ff_prd_env()
# There is only one production environment. Everything else is staging, but everything
# else is not staging.4dnucleome.org. Only one is that.
if env == data_env:
return urls['data']
elif env == blue_green_mirror_env(data_env):
# Mirror env might be None, in which case this clause will not be entered
return urls['staging']

bs_info = beanstalk_info(env)
url = "http://" + bs_info['CNAME']
return url


Expand Down
68 changes: 65 additions & 3 deletions dcicutils/env_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,17 +34,52 @@
# CGAP_ENV_WEBPROD2_NEW is meaningless here. See CGAP_ENV_STAGING_NEW.
CGAP_ENV_WOLF_NEW = 'cgap-wolf' # Maybe not used

# The bucket names were allocated originally and needn't change.

FF_PROD_BUCKET_ENV = FF_ENV_WEBPROD
CGAP_PROD_BUCKET_ENV = CGAP_ENV_WEBPROD

# Done this way to get maximally compatible behavior.
FOURFRONT_STG_OR_PRD_TOKENS = ['webprod', 'blue', 'green']
FOURFRONT_STG_OR_PRD_NAMES = ['staging', 'stagging', 'data']

# Done this way because it's safer going forward.
CGAP_STG_OR_PRD_TOKENS = []
CGAP_STG_OR_PRD_NAMES = [CGAP_ENV_WEBPROD, CGAP_ENV_PRODUCTION_GREEN, CGAP_ENV_PRODUCTION_BLUE]
CGAP_STG_OR_PRD_NAMES = [CGAP_ENV_WEBPROD, CGAP_ENV_PRODUCTION_GREEN, CGAP_ENV_PRODUCTION_BLUE, 'cgap']


FF_PUBLIC_URL_STG = 'http://staging.4dnucleome.org'
FF_PUBLIC_URL_PRD = 'https://data.4dnucleome.org'

FF_PUBLIC_URLS = {
'staging': FF_PUBLIC_URL_STG,
'data': FF_PUBLIC_URL_PRD,
}

CGAP_PUBLIC_URL_STG = 'https://staging.cgap.hms.harvard.edu' # This is a stopgap for testing and may have to change
CGAP_PUBLIC_URL_PRD = 'https://cgap.hms.harvard.edu'

CGAP_PUBLIC_URLS = {
'cgap': CGAP_PUBLIC_URL_PRD,
'data': CGAP_PUBLIC_URL_PRD,
'staging': CGAP_PUBLIC_URL_STG,
}

BEANSTALK_PROD_BUCKET_ENVS = {
'staging': FF_PROD_BUCKET_ENV,
'data': FF_PROD_BUCKET_ENV,
FF_ENV_WEBPROD: FF_PROD_BUCKET_ENV,
FF_ENV_WEBPROD2: FF_PROD_BUCKET_ENV,
FF_ENV_PRODUCTION_BLUE: FF_PROD_BUCKET_ENV,
FF_ENV_PRODUCTION_GREEN: FF_PROD_BUCKET_ENV,
'cgap': CGAP_PROD_BUCKET_ENV,
CGAP_ENV_PRODUCTION_BLUE: CGAP_PROD_BUCKET_ENV,
CGAP_ENV_PRODUCTION_GREEN: CGAP_PROD_BUCKET_ENV,
CGAP_ENV_WEBPROD: CGAP_PROD_BUCKET_ENV,
CGAP_ENV_PRODUCTION_BLUE_NEW: CGAP_PROD_BUCKET_ENV,
CGAP_ENV_PRODUCTION_GREEN_NEW: CGAP_PROD_BUCKET_ENV,
}

# These operate as pairs. Don't add extras.
BEANSTALK_PROD_MIRRORS = {

FF_ENV_PRODUCTION_BLUE: FF_ENV_PRODUCTION_GREEN,
Expand All @@ -61,7 +96,6 @@

}


BEANSTALK_TEST_ENVS = [

FF_ENV_HOTSEAT,
Expand Down Expand Up @@ -95,6 +129,34 @@ def blue_green_mirror_env(envname):
return None


def prod_bucket_env(envname):
"""
Given a production-class envname returns the envname of the associated production bucket.
For other envnames that aren't production envs, this returns None.
The envname is something that is either a staging or production env, in particular something
that is_stg_or_prd_env returns True for.
This is intended for use when configuring a beanstalk. This functionality is agnostic
about whether we're asking on behalf of CGAP or Fourfront, and whether we're using an old or new
naming scheme. Just give the current envname as an argument, and it will know (by declaration,
see the BEANSTALK_PROD_ENV_BUCKET_TOKENS table) what the appropriate production bucket name token is for
that ecosystem.
"""
return BEANSTALK_PROD_BUCKET_ENVS.get(envname)


def public_url_mappings(envname):
"""
Returns a table of the public URLs we use for the ecosystem in which the envname resides.
For example, if envname is a CGAP URL, this returns a set table of CGAP public URLs,
and otherwise it returns a set of Fourfront URLs.
The envname may be 'cgap', 'data', 'staging', or an environment name.
"""
return CGAP_PUBLIC_URLS if is_cgap_env(envname) else FF_PUBLIC_URLS


def is_cgap_env(envname):
"""
Returns True of the given string looks like a CGAP elasticbeanstalk environment name.
Expand Down
12 changes: 6 additions & 6 deletions dcicutils/ff_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@
import boto3
from . import (
s3_utils,
es_utils
es_utils,
env_utils,
)
from .misc_utils import PRINT
import requests
Expand All @@ -20,11 +21,13 @@
from urllib.parse import urlencode


# TODO (C4-92, C4-102): Probably to centralize this information in env_utils. Also figure out relation to CGAP.
HIGLASS_BUCKETS = ['elasticbeanstalk-fourfront-webprod-wfoutput',
'elasticbeanstalk-fourfront-webdev-wfoutput']


# TODO (C4-92): Centralize this information, it is repeated in other repos
# TODO (C4-102): Does this need to include CGAP envs? As part of the same list, or as a separate list?
PRODUCTION_ENVS = ['fourfront-blue', 'fourfront-green']


Expand Down Expand Up @@ -1067,11 +1070,8 @@ def unified_authentication(auth=None, ff_env=None):
"""
# first see if key should be obtained from using ff_env
if not auth and ff_env:
# webprod, webprod2 and blue/green all use the fourfront-webprod bucket for keys
if 'webprod' in ff_env or ff_env in PRODUCTION_ENVS:
use_env = 'fourfront-webprod'
else:
use_env = ff_env
# TODO: The ff_env argument is mis-named, something we should fix sometime. It can be a cgap env, too.
use_env = env_utils.prod_bucket_env(ff_env) if env_utils.is_stg_or_prd_env(ff_env) else ff_env
auth = s3_utils.s3Utils(env=use_env).get_access_keys()
# see if auth is directly from get_access_keys()
use_auth = None
Expand Down
20 changes: 20 additions & 0 deletions dcicutils/qa_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
"""
qa_utils: Tools for use in quality assurance testing.
"""

from .misc_utils import PRINT


def mock_not_called(name):
"""
This can be used in mocking to mock a function that should not be called.
Called with the name of a function, it returns a function that if called
will raise an AssertionError complaining that such a name was called.
"""
def mocked_function(*args, **kwargs):
# It's OK to print here because we're expected to be called in a testing context, and
# we're just about to fail a test. The person invoking the tests may want this data.
PRINT("args=", args)
PRINT("kwargs=", kwargs)
raise AssertionError("%s was called where not expected." % name)
return mocked_function
4 changes: 2 additions & 2 deletions dcicutils/s3_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from zipfile import ZipFile
from io import BytesIO
import logging
from .env_utils import is_stg_or_prd_env
from .env_utils import is_stg_or_prd_env, prod_bucket_env
from .misc_utils import PRINT


Expand All @@ -33,7 +33,7 @@ def __init__(self, outfile_bucket=None, sys_bucket=None, raw_file_bucket=None,
if env:
if is_stg_or_prd_env(env):
self.url = get_beanstalk_real_url(env)
env = 'fourfront-webprod'
env = prod_bucket_env(env)
# we use standardized naming schema, so s3 buckets always have same prefix
sys_bucket = "elasticbeanstalk-%s-system" % env
outfile_bucket = "elasticbeanstalk-%s-wfoutput" % env
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "dcicutils"
version = "0.12.1"
version = "0.13.0"
description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources"
authors = ["William Ronchetti <[email protected]>"]
license = "MIT"
Expand Down
Loading

0 comments on commit 95c5ff5

Please sign in to comment.