From d531fccfd7abdac4942d186cd55c57570fe23dc2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Pikul?= Date: Thu, 11 Apr 2024 21:16:56 +0200 Subject: [PATCH 1/2] Adding SQL-like join functions for dicts --- funcy/colls.py | 20 +++++++++++++++++++- tests/test_colls.py | 13 +++++++++++++ 2 files changed, 32 insertions(+), 1 deletion(-) diff --git a/funcy/colls.py b/funcy/colls.py index 6acfd53..ee1512c 100644 --- a/funcy/colls.py +++ b/funcy/colls.py @@ -17,7 +17,9 @@ 'is_distinct', 'all', 'any', 'none', 'one', 'some', 'zipdict', 'flip', 'project', 'omit', 'zip_values', 'zip_dicts', 'where', 'pluck', 'pluck_attr', 'invoke', 'lwhere', 'lpluck', 'lpluck_attr', 'linvoke', - 'get_in', 'get_lax', 'set_in', 'update_in', 'del_in', 'has_path'] + 'get_in', 'get_lax', 'set_in', 'update_in', 'del_in', 'has_path', + 'inner_join', 'left_join', 'right_join', 'full_join' + ] ### Generic ops @@ -360,3 +362,19 @@ def invoke(objects, name, *args, **kwargs): """Yields results of the obj.name(*args, **kwargs) for each object in objects.""" return map(methodcaller(name, *args, **kwargs), objects) + +def inner_join(left, right, left_column, right_column): + """Join two lists of dictionaries on the given columns.""" + return [join([l, r]) for l in left for r in right if left_column in l.keys() and right_column in r.keys() if l[left_column] == r[right_column]] + +def left_join(left, right, left_column, right_column): + """Join two lists of dictionaries on the given columns.""" + return inner_join(left, right, left_column, right_column) + [l for l in left if left_column not in l.keys()] + +def right_join(left, right, left_column, right_column): + """Join two lists of dictionaries on the given columns.""" + return inner_join(right, left, right_column, left_column) + [r for r in right if right_column not in r.keys()] + +def full_join(left, right, left_column, right_column): + """Join two lists of dictionaries on the given columns.""" + return inner_join(left, right, left_column, right_column) + [r for r in right if right_column not in r.keys()] + [l for l in left if left_column not in l.keys()] diff --git a/tests/test_colls.py b/tests/test_colls.py index dc39d8c..452689b 100644 --- a/tests/test_colls.py +++ b/tests/test_colls.py @@ -347,3 +347,16 @@ def test_pluck_attr(): def test_invoke(): assert linvoke(['abc', 'def', 'b'], 'find', 'b') == [1, -1, 0] + + +def test_inner_join(): + assert inner_join([{'a': 1, 'b': 2}, {'a': 3, 'b': 4},{'e':1,'f':2}], [{'c': 1, 'd': 2}, {'c': 3, 'd': 4},{"g":1,"h":2}], 'a', 'c') == [{'a': 1, 'b': 2, 'c': 1, 'd': 2}, {'a': 3, 'b': 4, 'c': 3, 'd': 4}] + +def test_left_join(): + assert left_join([{'a': 1, 'b': 2}, {'a': 3, 'b': 4},{'e':1,'f':2}], [{'c': 1, 'd': 2}, {'c': 3, 'd': 4},{"g":1,"h":2}], 'a', 'c') == [{'a': 1, 'b': 2, 'c': 1, 'd': 2}, {'a': 3, 'b': 4, 'c': 3, 'd': 4}, {'e': 1, 'f': 2}] + +def test_right_join(): + assert right_join([{'a': 1, 'b': 2}, {'a': 3, 'b': 4},{'e':1,'f':2}], [{'c': 1, 'd': 2}, {'c': 3, 'd': 4},{"g":1,"h":2}], 'a', 'c') == [{'a': 1, 'b': 2, 'c': 1, 'd': 2}, {'a': 3, 'b': 4, 'c': 3, 'd': 4}, {'g': 1, 'h': 2}] + +def test_full_join(): + assert full_join([{'a': 1, 'b': 2}, {'a': 3, 'b': 4},{'e':1,'f':2}], [{'c': 1, 'd': 2}, {'c': 3, 'd': 4},{"g":1,"h":2}], 'a', 'c') == [{'a': 1, 'b': 2, 'c': 1, 'd': 2}, {'a': 3, 'b': 4, 'c': 3, 'd': 4}, {'g': 1, 'h': 2}, {'e': 1, 'f': 2}] \ No newline at end of file From 4914da2f04d73b0ebd8294e9dcaea508a2518709 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Pikul?= Date: Thu, 11 Apr 2024 21:29:41 +0200 Subject: [PATCH 2/2] Adding docs for joins --- docs/colls.rst | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/docs/colls.rst b/docs/colls.rst index 7ea45be..0c5c201 100644 --- a/docs/colls.rst +++ b/docs/colls.rst @@ -313,6 +313,49 @@ Data manipulation invoke(['abc', 'def', 'b'], 'find', 'b') # ->[1, -1, 0] +.. function:: inner_join(left,right,right_column,left_column) + + Returns a list of tuples of matching rows from the two tables. The tables are expected to be lists of dictionaries, and the columns are the keys in the dictionaries. + + For example:: + + left = [{'id': 1, 'name': 'Alice'}, {'id': 2, 'name': 'Bob'}] + right = [{'id': 1, 'age': 25}, {'id': 3, 'age': 30}] + inner_join(left, right, 'id', 'id') + # -> [{'id': 1, 'name': 'Alice', 'age': 25}] + +.. function:: left_join(left,right,right_column,left_column) + + Returns a list of tuples of matching rows from the two tables. The tables are expected to be lists of dictionaries, and the columns are the keys in the dictionaries. If a row in the left table does not have a match in the right table, the right table columns will be omitted. + + For example:: + + left = [{'id': 1, 'name': 'Alice'}, {'id': 2, 'name': 'Bob'}] + right = [{'id': 1, 'age': 25}, {'id': 3, 'age': 30}] + left_join(left, right, 'id', 'id') + # -> [{'id': 1, 'name': 'Alice', 'age': 25}, {'id': 2, 'name': 'Bob'}] + +.. function:: right_join(left,right,right_column,left_column) + + Returns a list of tuples of matching rows from the two tables. The tables are expected to be lists of dictionaries, and the columns are the keys in the dictionaries. If a row in the right table does not have a match in the left table, the left table columns will be omitted. + + For example:: + + left = [{'id': 1, 'name': 'Alice'}, {'id': 2, 'name': 'Bob'}] + right = [{'id': 1, 'age': 25}, {'id': 3, 'age': 30}] + right_join(left, right, 'id', 'id') + # -> [{'id': 1, 'name': 'Alice', 'age': 25}, {'id': 3, 'age': 30}] + +.. function:: full_join(left,right,right_column,left_column) + + Returns a list of tuples of matching rows from the two tables. The tables are expected to be lists of dictionaries, and the columns are the keys in the dictionaries. If a row in the left table does not have a match in the right table, the right table columns will be omitted. If a row in any of the tables does not have a match in the other table, the columns from the other table will be omitted. + + For example:: + + left = [{'id': 1, 'name': 'Alice'}, {'id': 2, 'name': 'Bob'}] + right = [{'id': 1, 'age': 25}, {'id': 3, 'age': 30}] + full_join(left, right, 'id', 'id') + # -> [{'id': 1, 'name': 'Alice', 'age': 25}, {'id': 2, 'name': 'Bob'}, {'id': 3, 'age': 30}] Content tests -------------