From c8e1c05a7183d460167773d7fd970f3177b94f69 Mon Sep 17 00:00:00 2001
From: PAlena <86962990+PAlena@users.noreply.github.com>
Date: Tue, 2 Apr 2024 20:51:14 +0200
Subject: [PATCH] Add files via upload
---
Seminar_3/seminar_3_solved.ipynb | 3097 ++++++++++++++++++++++++++++++
1 file changed, 3097 insertions(+)
create mode 100644 Seminar_3/seminar_3_solved.ipynb
diff --git a/Seminar_3/seminar_3_solved.ipynb b/Seminar_3/seminar_3_solved.ipynb
new file mode 100644
index 0000000..ae7000f
--- /dev/null
+++ b/Seminar_3/seminar_3_solved.ipynb
@@ -0,0 +1,3097 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "492eb907-b454-403f-914a-281a61751e07",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import requests\n",
+ "import pandas as pd\n",
+ "import time"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "9b9babd9-e6ad-49bd-9e5e-c479363d305b",
+ "metadata": {
+ "tags": []
+ },
+ "source": [
+ "# Seminar - APIs, DBs and Live coding"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "4d7941ab-1155-4c89-8095-94edf11f889d",
+ "metadata": {
+ "tags": []
+ },
+ "source": [
+ "## Task 1: Requesting API\n",
+ "### 1a. Create a function requesting data from sreality\n",
+ "\n",
+ "\n",
+ "```python\n",
+ "base_url = 'https://www.sreality.cz/api/cs/v2/estates?category_main_cb=1&category_type_cb=1&locality_region_id=10&per_page60&page={}'.format(i)\n",
+ "\n",
+ "r = requests.get(base_url)\n",
+ "d = r.json()\n",
+ "```\n",
+ "\n",
+ "* function should parametrize: \n",
+ " * `category_main_cb` - `{'flat':1, 'house':2, 'land':3 }`\n",
+ " * `category_type_cb` - `{'sell':1,'rent':2}`\n",
+ " * `locality_region_id` - use 10 as default value\n",
+ " * `page` parameter\n",
+ "* use string inputs for `category_main_cb` and `category_type_cb`\n",
+ "* include try/except clause to handle errors\n",
+ "* function should return JSON data in python types\n",
+ "* do not forget to sleep each request at least 0.5s"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "3bf7c9dc-be77-48ad-b373-c1525983da7a",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Hi\n",
+ "CPU times: user 74 µs, sys: 15 µs, total: 89 µs\n",
+ "Wall time: 99.2 µs\n"
+ ]
+ }
+ ],
+ "source": [
+ "%%time\n",
+ "print('Hi')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "8d075a9e-e094-483d-9a9f-2fd5a7eae194",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "CPU times: user 1.6 ms, sys: 1.12 ms, total: 2.72 ms\n",
+ "Wall time: 5.01 s\n"
+ ]
+ }
+ ],
+ "source": [
+ "%%time\n",
+ "time.sleep(5)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "b8758ddd-8357-4b98-b9ac-e8cf0c3629b4",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "CPU times: user 2.35 ms, sys: 0 ns, total: 2.35 ms\n",
+ "Wall time: 1 s\n"
+ ]
+ }
+ ],
+ "source": [
+ "%%time\n",
+ "time.sleep(1)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "55cca328-f8c4-4fba-a0b3-6d20c990d712",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def request_sreality(page, category_main_str, category_type_str, locality_region_id=10):\n",
+ " time.sleep(0.5)\n",
+ " category_mains = {'flat':1, 'house':2, 'land':3 }\n",
+ " category_types = {'sell':1,'rent':2}\n",
+ " template_url = 'https://www.sreality.cz/api/cs/v2/estates?category_main_cb={category_main}&category_type_cb={category_type}&locality_region_id={locality_region_id}&per_page60&page={page}'\n",
+ " try:\n",
+ " request_url = template_url.format(\n",
+ " category_main=category_mains[category_main_str],\n",
+ " category_type=category_types[category_type_str],\n",
+ " locality_region_id=locality_region_id,\n",
+ " page=page\n",
+ " )\n",
+ " r = requests.get(request_url)\n",
+ " return r.json()\n",
+ " except Exception as e:\n",
+ " print(e)\n",
+ "d = request_sreality(0, 'flat', 'sell', 10)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "id": "1a40af97-3f19-4a45-ab62-13ed539b6c86",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "dict_keys(['meta_description', 'result_size', '_embedded', 'filterLabels', 'title', 'filter', '_links', 'locality', 'locality_dativ', 'logged_in', 'per_page', 'category_instrumental', 'page', 'filterLabels2'])"
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "d.keys()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "id": "768f1e7e-5330-4f55-b846-7bd02252d45b",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'5045 realit v nabídce prodej bytů Praha. Vyberte si novou nemovitost na sreality.cz s hledáním na mapě a velkými náhledy fotografií nabízených bytů.'"
+ ]
+ },
+ "execution_count": 12,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "d['meta_description']"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "id": "78942b2d-e947-47e0-8dc2-fe333f7c31dc",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "5045"
+ ]
+ },
+ "execution_count": 13,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "d['result_size']"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "id": "32a6da59-18de-4ce0-8127-873f6ec29a0a",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "dict_keys(['estates', 'is_saved', 'not_precise_location_count'])"
+ ]
+ },
+ "execution_count": 15,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "d['_embedded'].keys()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "8b718701-e4a4-4fe4-bf34-d03913765b2a",
+ "metadata": {},
+ "source": [
+ "### 1b. Create a function converting sreality json data into pandas dataframe"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 26,
+ "id": "c1eae9b3-0571-4699-9933-3868f362ef83",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "21"
+ ]
+ },
+ "execution_count": 26,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "len(d['_embedded']['estates'])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 37,
+ "id": "1c60d718-0c53-4b00-9de3-234c72e938ca",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "27"
+ ]
+ },
+ "execution_count": 37,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "len(d['_embedded']['estates'][4].keys())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 33,
+ "id": "99e70448-9475-465b-a85c-e4bc5cd1778a",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " a | \n",
+ " b | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 44 | \n",
+ " 443 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 14 | \n",
+ " 4454 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 45554 | \n",
+ " 55553 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " a b\n",
+ "0 44 443\n",
+ "1 14 4454\n",
+ "2 45554 55553"
+ ]
+ },
+ "execution_count": 33,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "data_lists = [{'a':44, 'b':443},{'a':14, 'b':4454},{'a':45554, 'b':55553}]\n",
+ "pd.DataFrame(data_lists)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "id": "71e52613-8e0e-4b5a-a579-76d803eafa31",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def convert_sreality_data_to_df(sreality_data):\n",
+ " return pd.DataFrame(sreality_data['_embedded']['estates'])\n",
+ "\n",
+ "raw = convert_sreality_data_to_df(d)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 38,
+ "id": "219610c5-fea8-487c-8682-2e803d1fc2d1",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " labelsReleased | \n",
+ " has_panorama | \n",
+ " labels | \n",
+ " is_auction | \n",
+ " labelsAll | \n",
+ " seo | \n",
+ " exclusively_at_rk | \n",
+ " category | \n",
+ " has_floor_plan | \n",
+ " _embedded | \n",
+ " ... | \n",
+ " hash_id | \n",
+ " attractive_offer | \n",
+ " price | \n",
+ " price_czk | \n",
+ " _links | \n",
+ " rus | \n",
+ " name | \n",
+ " region_tip | \n",
+ " gps | \n",
+ " has_matterport_url | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " [[balcony, parking_lots, garage], []] | \n",
+ " 0 | \n",
+ " [Balkon, Parkování, Garáž] | \n",
+ " False | \n",
+ " [[personal, balcony, brick, elevator, parking_... | \n",
+ " {'category_main_cb': 1, 'category_sub_cb': 8, ... | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " {'favourite': {'is_favourite': False, '_links'... | \n",
+ " ... | \n",
+ " 58234188 | \n",
+ " 0 | \n",
+ " 12862000 | \n",
+ " {'value_raw': 12862000, 'unit': '', 'name': 'C... | \n",
+ " {'dynamicDown': [{'href': 'https://d18-a.sdn.c... | \n",
+ " False | \n",
+ " Prodej bytu 4+kk 128 m² | \n",
+ " 2693402 | \n",
+ " {'lat': 50.12603618747833, 'lon': 14.561554812... | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " [[not_furnished], [restaurant, drugstore]] | \n",
+ " 0 | \n",
+ " [Nevybavený, Restaurace 1 min. pěšky, Lékárna ... | \n",
+ " False | \n",
+ " [[new_building, personal, elevator, not_furnis... | \n",
+ " {'category_main_cb': 1, 'category_sub_cb': 2, ... | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " {'favourite': {'is_favourite': False, '_links'... | \n",
+ " ... | \n",
+ " 89429068 | \n",
+ " 0 | \n",
+ " 3990000 | \n",
+ " {'value_raw': 3990000, 'unit': '', 'name': 'Ce... | \n",
+ " {'dynamicDown': [{'href': 'https://d18-a.sdn.c... | \n",
+ " False | \n",
+ " Prodej bytu 1+kk 24 m² | \n",
+ " 0 | \n",
+ " {'lat': 50.09041518747833, 'lon': 14.531943812... | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " [[], []] | \n",
+ " 0 | \n",
+ " [] | \n",
+ " False | \n",
+ " [[new_building, personal, brick, cellar, eleva... | \n",
+ " {'category_main_cb': 1, 'category_sub_cb': 6, ... | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " {'favourite': {'is_favourite': False, '_links'... | \n",
+ " ... | \n",
+ " 567759948 | \n",
+ " 0 | \n",
+ " 21978000 | \n",
+ " {'value_raw': 21978000, 'unit': '', 'name': 'C... | \n",
+ " {'dynamicDown': [{'href': 'https://d18-a.sdn.c... | \n",
+ " False | \n",
+ " Prodej bytu 3+kk 122 m² | \n",
+ " 0 | \n",
+ " {'lat': 50.06292218747833, 'lon': 14.381577812... | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " [[], []] | \n",
+ " 0 | \n",
+ " [] | \n",
+ " False | \n",
+ " [[new_building, personal, brick, cellar, eleva... | \n",
+ " {'category_main_cb': 1, 'category_sub_cb': 6, ... | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " {'favourite': {'is_favourite': False, '_links'... | \n",
+ " ... | \n",
+ " 618091596 | \n",
+ " 0 | \n",
+ " 18559000 | \n",
+ " {'value_raw': 18559000, 'unit': '', 'name': 'C... | \n",
+ " {'dynamicDown': [{'href': 'https://d18-a.sdn.c... | \n",
+ " False | \n",
+ " Prodej bytu 3+kk 103 m² | \n",
+ " 0 | \n",
+ " {'lat': 50.06292218747833, 'lon': 14.381577812... | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " [[], [kindergarten, drugstore]] | \n",
+ " 0 | \n",
+ " [Školka 6 min. pěšky, Lékárna 5 min. pěšky] | \n",
+ " False | \n",
+ " [[new_building, personal, brick], [candy_shop,... | \n",
+ " {'category_main_cb': 1, 'category_sub_cb': 8, ... | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " {'favourite': {'is_favourite': False, '_links'... | \n",
+ " ... | \n",
+ " 973042764 | \n",
+ " 0 | \n",
+ " 21876000 | \n",
+ " {'value_raw': 21876000, 'unit': '', 'name': 'C... | \n",
+ " {'dynamicDown': [{'href': 'https://d18-a.sdn.c... | \n",
+ " False | \n",
+ " Prodej bytu 4+kk 139 m² | \n",
+ " 0 | \n",
+ " {'lat': 50.06782018747833, 'lon': 14.507568812... | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
5 rows × 27 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " labelsReleased has_panorama \\\n",
+ "0 [[balcony, parking_lots, garage], []] 0 \n",
+ "1 [[not_furnished], [restaurant, drugstore]] 0 \n",
+ "2 [[], []] 0 \n",
+ "3 [[], []] 0 \n",
+ "4 [[], [kindergarten, drugstore]] 0 \n",
+ "\n",
+ " labels is_auction \\\n",
+ "0 [Balkon, Parkování, Garáž] False \n",
+ "1 [Nevybavený, Restaurace 1 min. pěšky, Lékárna ... False \n",
+ "2 [] False \n",
+ "3 [] False \n",
+ "4 [Školka 6 min. pěšky, Lékárna 5 min. pěšky] False \n",
+ "\n",
+ " labelsAll \\\n",
+ "0 [[personal, balcony, brick, elevator, parking_... \n",
+ "1 [[new_building, personal, elevator, not_furnis... \n",
+ "2 [[new_building, personal, brick, cellar, eleva... \n",
+ "3 [[new_building, personal, brick, cellar, eleva... \n",
+ "4 [[new_building, personal, brick], [candy_shop,... \n",
+ "\n",
+ " seo exclusively_at_rk \\\n",
+ "0 {'category_main_cb': 1, 'category_sub_cb': 8, ... 0 \n",
+ "1 {'category_main_cb': 1, 'category_sub_cb': 2, ... 0 \n",
+ "2 {'category_main_cb': 1, 'category_sub_cb': 6, ... 0 \n",
+ "3 {'category_main_cb': 1, 'category_sub_cb': 6, ... 0 \n",
+ "4 {'category_main_cb': 1, 'category_sub_cb': 8, ... 0 \n",
+ "\n",
+ " category has_floor_plan \\\n",
+ "0 1 0 \n",
+ "1 1 1 \n",
+ "2 1 1 \n",
+ "3 1 1 \n",
+ "4 1 1 \n",
+ "\n",
+ " _embedded ... hash_id \\\n",
+ "0 {'favourite': {'is_favourite': False, '_links'... ... 58234188 \n",
+ "1 {'favourite': {'is_favourite': False, '_links'... ... 89429068 \n",
+ "2 {'favourite': {'is_favourite': False, '_links'... ... 567759948 \n",
+ "3 {'favourite': {'is_favourite': False, '_links'... ... 618091596 \n",
+ "4 {'favourite': {'is_favourite': False, '_links'... ... 973042764 \n",
+ "\n",
+ " attractive_offer price \\\n",
+ "0 0 12862000 \n",
+ "1 0 3990000 \n",
+ "2 0 21978000 \n",
+ "3 0 18559000 \n",
+ "4 0 21876000 \n",
+ "\n",
+ " price_czk \\\n",
+ "0 {'value_raw': 12862000, 'unit': '', 'name': 'C... \n",
+ "1 {'value_raw': 3990000, 'unit': '', 'name': 'Ce... \n",
+ "2 {'value_raw': 21978000, 'unit': '', 'name': 'C... \n",
+ "3 {'value_raw': 18559000, 'unit': '', 'name': 'C... \n",
+ "4 {'value_raw': 21876000, 'unit': '', 'name': 'C... \n",
+ "\n",
+ " _links rus \\\n",
+ "0 {'dynamicDown': [{'href': 'https://d18-a.sdn.c... False \n",
+ "1 {'dynamicDown': [{'href': 'https://d18-a.sdn.c... False \n",
+ "2 {'dynamicDown': [{'href': 'https://d18-a.sdn.c... False \n",
+ "3 {'dynamicDown': [{'href': 'https://d18-a.sdn.c... False \n",
+ "4 {'dynamicDown': [{'href': 'https://d18-a.sdn.c... False \n",
+ "\n",
+ " name region_tip \\\n",
+ "0 Prodej bytu 4+kk 128 m² 2693402 \n",
+ "1 Prodej bytu 1+kk 24 m² 0 \n",
+ "2 Prodej bytu 3+kk 122 m² 0 \n",
+ "3 Prodej bytu 3+kk 103 m² 0 \n",
+ "4 Prodej bytu 4+kk 139 m² 0 \n",
+ "\n",
+ " gps has_matterport_url \n",
+ "0 {'lat': 50.12603618747833, 'lon': 14.561554812... False \n",
+ "1 {'lat': 50.09041518747833, 'lon': 14.531943812... False \n",
+ "2 {'lat': 50.06292218747833, 'lon': 14.381577812... False \n",
+ "3 {'lat': 50.06292218747833, 'lon': 14.381577812... False \n",
+ "4 {'lat': 50.06782018747833, 'lon': 14.507568812... True \n",
+ "\n",
+ "[5 rows x 27 columns]"
+ ]
+ },
+ "execution_count": 38,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "raw.head()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "fc2cde54-c6c3-4baf-9e4c-b740d8eb4dbd",
+ "metadata": {
+ "tags": []
+ },
+ "source": [
+ "### 1c. link function `1b` into function `1a`"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 39,
+ "id": "e8da8611-df45-4f30-87d6-8059f61f810d",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " labelsReleased | \n",
+ " has_panorama | \n",
+ " labels | \n",
+ " is_auction | \n",
+ " labelsAll | \n",
+ " seo | \n",
+ " exclusively_at_rk | \n",
+ " category | \n",
+ " has_floor_plan | \n",
+ " _embedded | \n",
+ " ... | \n",
+ " hash_id | \n",
+ " attractive_offer | \n",
+ " price | \n",
+ " price_czk | \n",
+ " _links | \n",
+ " rus | \n",
+ " name | \n",
+ " region_tip | \n",
+ " gps | \n",
+ " has_matterport_url | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " [[new_building, garage], []] | \n",
+ " 0 | \n",
+ " [Novostavba, Garáž] | \n",
+ " False | \n",
+ " [[new_building, personal, terrace, elevator, p... | \n",
+ " {'category_main_cb': 1, 'category_sub_cb': 6, ... | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " {'favourite': {'is_favourite': False, '_links'... | \n",
+ " ... | \n",
+ " 568091724 | \n",
+ " 0 | \n",
+ " 21760000 | \n",
+ " {'value_raw': 21760000, 'unit': '', 'name': 'C... | \n",
+ " {'dynamicDown': [{'href': 'https://d18-a.sdn.c... | \n",
+ " False | \n",
+ " Prodej bytu 3+kk 123 m² | \n",
+ " 0 | \n",
+ " {'lat': 50.06301418747833, 'lon': 14.376991812... | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " [[], [post_office, medic]] | \n",
+ " 0 | \n",
+ " [Pošta 6 min. pěšky, Lékař 6 min. pěšky] | \n",
+ " False | \n",
+ " [[personal, brick], [candy_shop, small_shop, t... | \n",
+ " {'category_main_cb': 1, 'category_sub_cb': 4, ... | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " {'favourite': {'is_favourite': False, '_links'... | \n",
+ " ... | \n",
+ " 2946720844 | \n",
+ " 0 | \n",
+ " 24335000 | \n",
+ " {'value_raw': 24335000, 'unit': '', 'name': 'C... | \n",
+ " {'dynamicDown': [{'href': 'https://d18-a.sdn.c... | \n",
+ " False | \n",
+ " Prodej bytu 2+kk 160 m² | \n",
+ " 0 | \n",
+ " {'lat': 50.07837518747833, 'lon': 14.436064812... | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " [[], [metro, shop]] | \n",
+ " 0 | \n",
+ " [Metro 5 min. pěšky, Obchod 5 min. pěšky] | \n",
+ " False | \n",
+ " [[personal, balcony, cellar, elevator, parking... | \n",
+ " {'category_main_cb': 1, 'category_sub_cb': 6, ... | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " {'favourite': {'is_favourite': False, '_links'... | \n",
+ " ... | \n",
+ " 400340300 | \n",
+ " 0 | \n",
+ " 14034000 | \n",
+ " {'value_raw': 14034000, 'unit': '', 'name': 'C... | \n",
+ " {'dynamicDown': [{'href': 'https://d18-a.sdn.c... | \n",
+ " False | \n",
+ " Prodej bytu 3+kk 108 m² | \n",
+ " 0 | \n",
+ " {'lat': 50.03316718747833, 'lon': 14.336494812... | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " [[after_reconstruction], [metro, shop]] | \n",
+ " 0 | \n",
+ " [Po rekonstrukci, Metro 2 min. pěšky, Obchod 3... | \n",
+ " False | \n",
+ " [[personal, after_reconstruction, brick, parki... | \n",
+ " {'category_main_cb': 1, 'category_sub_cb': 2, ... | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " {'favourite': {'is_favourite': False, '_links'... | \n",
+ " ... | \n",
+ " 1144341580 | \n",
+ " 0 | \n",
+ " 7017000 | \n",
+ " {'value_raw': 7017000, 'unit': '', 'name': 'Ce... | \n",
+ " {'dynamicDown': [{'href': 'https://d18-a.sdn.c... | \n",
+ " False | \n",
+ " Prodej bytu 1+kk 39 m² | \n",
+ " 0 | \n",
+ " {'lat': 50.05947018747833, 'lon': 14.419744812... | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " [[], [post_office]] | \n",
+ " 0 | \n",
+ " [Pošta 6 min. pěšky] | \n",
+ " False | \n",
+ " [[personal, terrace, elevator], [small_shop, t... | \n",
+ " {'category_main_cb': 1, 'category_sub_cb': 4, ... | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " {'favourite': {'is_favourite': False, '_links'... | \n",
+ " ... | \n",
+ " 1627543372 | \n",
+ " 0 | \n",
+ " 8694000 | \n",
+ " {'value_raw': 8694000, 'unit': '', 'name': 'Ce... | \n",
+ " {'dynamicDown': [{'href': 'https://d18-a.sdn.c... | \n",
+ " False | \n",
+ " Prodej bytu 2+kk 46 m² | \n",
+ " 0 | \n",
+ " {'lat': 50.092200187478326, 'lon': 14.46233681... | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
5 rows × 27 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " labelsReleased has_panorama \\\n",
+ "0 [[new_building, garage], []] 0 \n",
+ "1 [[], [post_office, medic]] 0 \n",
+ "2 [[], [metro, shop]] 0 \n",
+ "3 [[after_reconstruction], [metro, shop]] 0 \n",
+ "4 [[], [post_office]] 0 \n",
+ "\n",
+ " labels is_auction \\\n",
+ "0 [Novostavba, Garáž] False \n",
+ "1 [Pošta 6 min. pěšky, Lékař 6 min. pěšky] False \n",
+ "2 [Metro 5 min. pěšky, Obchod 5 min. pěšky] False \n",
+ "3 [Po rekonstrukci, Metro 2 min. pěšky, Obchod 3... False \n",
+ "4 [Pošta 6 min. pěšky] False \n",
+ "\n",
+ " labelsAll \\\n",
+ "0 [[new_building, personal, terrace, elevator, p... \n",
+ "1 [[personal, brick], [candy_shop, small_shop, t... \n",
+ "2 [[personal, balcony, cellar, elevator, parking... \n",
+ "3 [[personal, after_reconstruction, brick, parki... \n",
+ "4 [[personal, terrace, elevator], [small_shop, t... \n",
+ "\n",
+ " seo exclusively_at_rk \\\n",
+ "0 {'category_main_cb': 1, 'category_sub_cb': 6, ... 0 \n",
+ "1 {'category_main_cb': 1, 'category_sub_cb': 4, ... 0 \n",
+ "2 {'category_main_cb': 1, 'category_sub_cb': 6, ... 0 \n",
+ "3 {'category_main_cb': 1, 'category_sub_cb': 2, ... 0 \n",
+ "4 {'category_main_cb': 1, 'category_sub_cb': 4, ... 0 \n",
+ "\n",
+ " category has_floor_plan \\\n",
+ "0 1 1 \n",
+ "1 1 0 \n",
+ "2 1 0 \n",
+ "3 1 1 \n",
+ "4 1 1 \n",
+ "\n",
+ " _embedded ... hash_id \\\n",
+ "0 {'favourite': {'is_favourite': False, '_links'... ... 568091724 \n",
+ "1 {'favourite': {'is_favourite': False, '_links'... ... 2946720844 \n",
+ "2 {'favourite': {'is_favourite': False, '_links'... ... 400340300 \n",
+ "3 {'favourite': {'is_favourite': False, '_links'... ... 1144341580 \n",
+ "4 {'favourite': {'is_favourite': False, '_links'... ... 1627543372 \n",
+ "\n",
+ " attractive_offer price \\\n",
+ "0 0 21760000 \n",
+ "1 0 24335000 \n",
+ "2 0 14034000 \n",
+ "3 0 7017000 \n",
+ "4 0 8694000 \n",
+ "\n",
+ " price_czk \\\n",
+ "0 {'value_raw': 21760000, 'unit': '', 'name': 'C... \n",
+ "1 {'value_raw': 24335000, 'unit': '', 'name': 'C... \n",
+ "2 {'value_raw': 14034000, 'unit': '', 'name': 'C... \n",
+ "3 {'value_raw': 7017000, 'unit': '', 'name': 'Ce... \n",
+ "4 {'value_raw': 8694000, 'unit': '', 'name': 'Ce... \n",
+ "\n",
+ " _links rus \\\n",
+ "0 {'dynamicDown': [{'href': 'https://d18-a.sdn.c... False \n",
+ "1 {'dynamicDown': [{'href': 'https://d18-a.sdn.c... False \n",
+ "2 {'dynamicDown': [{'href': 'https://d18-a.sdn.c... False \n",
+ "3 {'dynamicDown': [{'href': 'https://d18-a.sdn.c... False \n",
+ "4 {'dynamicDown': [{'href': 'https://d18-a.sdn.c... False \n",
+ "\n",
+ " name region_tip \\\n",
+ "0 Prodej bytu 3+kk 123 m² 0 \n",
+ "1 Prodej bytu 2+kk 160 m² 0 \n",
+ "2 Prodej bytu 3+kk 108 m² 0 \n",
+ "3 Prodej bytu 1+kk 39 m² 0 \n",
+ "4 Prodej bytu 2+kk 46 m² 0 \n",
+ "\n",
+ " gps has_matterport_url \n",
+ "0 {'lat': 50.06301418747833, 'lon': 14.376991812... False \n",
+ "1 {'lat': 50.07837518747833, 'lon': 14.436064812... False \n",
+ "2 {'lat': 50.03316718747833, 'lon': 14.336494812... False \n",
+ "3 {'lat': 50.05947018747833, 'lon': 14.419744812... False \n",
+ "4 {'lat': 50.092200187478326, 'lon': 14.46233681... False \n",
+ "\n",
+ "[5 rows x 27 columns]"
+ ]
+ },
+ "execution_count": 39,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "def request_sreality(page, category_main_str, category_type_str, locality_region_id=10):\n",
+ " category_mains = {'flat':1, 'house':2, 'land':3 }\n",
+ " category_types = {'sell':1,'rent':2}\n",
+ " template_url = 'https://www.sreality.cz/api/cs/v2/estates?category_main_cb={category_main}&category_type_cb={category_type}&locality_region_id={locality_region_id}&per_page60&page={page}'\n",
+ " \n",
+ " request_url = template_url.format(\n",
+ " category_main=category_mains[category_main_str],\n",
+ " category_type=category_types[category_type_str],\n",
+ " locality_region_id=locality_region_id,\n",
+ " page=page\n",
+ " )\n",
+ " \n",
+ " try: \n",
+ " r = requests.get(request_url)\n",
+ " return convert_sreality_data_to_df(r.json())\n",
+ " except Exception as e:\n",
+ " print(f'error requesting url {request_url}. Reason: {e.message}')\n",
+ " \n",
+ "df = request_sreality(0, 'flat', 'sell', 10)\n",
+ "df.head()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "4ebab853-b6f2-4335-b13a-6c3cbba1951b",
+ "metadata": {},
+ "source": [
+ "### 1c. Combining multiple requests into single df\n",
+ "\n",
+ "* Function should parametrize:\n",
+ " * `start_page` and `end_page`\n",
+ " * request parameters\n",
+ "* construct a list of individual request dfs\n",
+ "* then feed it into `pd.concat` function"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 26,
+ "id": "bc61d311-c46a-4aee-a004-8349ec3ce0de",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(21, 27)"
+ ]
+ },
+ "execution_count": 26,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "raw.shape"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 44,
+ "id": "b1d9bef1-7e5c-4648-89a4-6f472968f3c6",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 44,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "request_sreality"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 45,
+ "id": "284687ef-aba6-4bbf-b7bf-c42dafda4cb4",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(103, 27)"
+ ]
+ },
+ "execution_count": 45,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "def request_multiply_sreality(start_page, end_page, category_main_str, category_type_str, locality_region_id=10):\n",
+ " pages = range(start_page, end_page + 1)\n",
+ " list_of_dfs = [request_sreality(page, category_main_str, category_type_str, locality_region_id) for page in pages]\n",
+ " return pd.concat(list_of_dfs)\n",
+ "\n",
+ "df = request_multiply_sreality(1, 5, 'flat', 'sell',10)\n",
+ "df.shape"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 53,
+ "id": "cb5b33f7-fce3-4331-9d3e-ecc7b5184253",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df = df.reset_index().drop('index', axis=1)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "bdde40e7-f68e-4859-878e-772c112f7355",
+ "metadata": {},
+ "source": [
+ "## Task 2: Cleaning data\n",
+ "\n",
+ "### 2a. Filter columns\n",
+ "* filter only columns: `['locality', 'price', 'name', 'gps','hash_id','exclusively_at_rk']`\n",
+ "* use `.copy()` to avoid `SettingWithCopyWarning` later\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 55,
+ "id": "34d14f44-48f4-4bcd-bac0-ddf282242464",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " locality | \n",
+ " price | \n",
+ " name | \n",
+ " gps | \n",
+ " hash_id | \n",
+ " exclusively_at_rk | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Praha 9 - Kbely | \n",
+ " 12862000 | \n",
+ " Prodej bytu 4+kk 128 m² | \n",
+ " {'lat': 50.12603618747833, 'lon': 14.561554812... | \n",
+ " 58234188 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Praha 2 - Vinohrady | \n",
+ " 21566000 | \n",
+ " Prodej bytu 2+kk 126 m² | \n",
+ " {'lat': 50.06495918747833, 'lon': 14.454340812... | \n",
+ " 4107789900 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Praha 5 - Sobín | \n",
+ " 17382000 | \n",
+ " Prodej bytu 3+kk 97 m² | \n",
+ " {'lat': 50.052054187478326, 'lon': 14.28598081... | \n",
+ " 1972872524 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Praha 5 - Stodůlky | \n",
+ " 18286000 | \n",
+ " Prodej bytu 4+kk 122 m² | \n",
+ " {'lat': 50.02775118747833, 'lon': 14.324684812... | \n",
+ " 866350924 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " Praha 5 - Stodůlky | \n",
+ " 14140000 | \n",
+ " Prodej bytu 3+kk 88 m² | \n",
+ " {'lat': 50.02775118747833, 'lon': 14.324684812... | \n",
+ " 3735254860 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 98 | \n",
+ " Praha 8 - Karlín | \n",
+ " 10236000 | \n",
+ " Prodej bytu 1+kk 60 m² | \n",
+ " {'lat': 50.08081318747833, 'lon': 14.459052812... | \n",
+ " 1918002252 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 99 | \n",
+ " Praha 4 - Michle | \n",
+ " 29614000 | \n",
+ " Prodej bytu 3+kk 272 m² | \n",
+ " {'lat': 50.03685218747833, 'lon': 14.467224812... | \n",
+ " 2810619212 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 100 | \n",
+ " Praha 4 - Modřany | \n",
+ " 14018000 | \n",
+ " Prodej bytu 3+kk 100 m² | \n",
+ " {'lat': 49.989115187478326, 'lon': 14.41775681... | \n",
+ " 1567020876 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 101 | \n",
+ " Praha 9 - Kbely | \n",
+ " 11121000 | \n",
+ " Prodej bytu 3+kk 88 m² | \n",
+ " {'lat': 50.11815518747833, 'lon': 14.550433812... | \n",
+ " 1684042828 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 102 | \n",
+ " Praha 5 - Stodůlky | \n",
+ " 11421000 | \n",
+ " Prodej bytu 2+kk 74 m² | \n",
+ " {'lat': 50.03348418747833, 'lon': 14.323431812... | \n",
+ " 347063372 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
103 rows × 6 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " locality price name \\\n",
+ "0 Praha 9 - Kbely 12862000 Prodej bytu 4+kk 128 m² \n",
+ "1 Praha 2 - Vinohrady 21566000 Prodej bytu 2+kk 126 m² \n",
+ "2 Praha 5 - Sobín 17382000 Prodej bytu 3+kk 97 m² \n",
+ "3 Praha 5 - Stodůlky 18286000 Prodej bytu 4+kk 122 m² \n",
+ "4 Praha 5 - Stodůlky 14140000 Prodej bytu 3+kk 88 m² \n",
+ ".. ... ... ... \n",
+ "98 Praha 8 - Karlín 10236000 Prodej bytu 1+kk 60 m² \n",
+ "99 Praha 4 - Michle 29614000 Prodej bytu 3+kk 272 m² \n",
+ "100 Praha 4 - Modřany 14018000 Prodej bytu 3+kk 100 m² \n",
+ "101 Praha 9 - Kbely 11121000 Prodej bytu 3+kk 88 m² \n",
+ "102 Praha 5 - Stodůlky 11421000 Prodej bytu 2+kk 74 m² \n",
+ "\n",
+ " gps hash_id \\\n",
+ "0 {'lat': 50.12603618747833, 'lon': 14.561554812... 58234188 \n",
+ "1 {'lat': 50.06495918747833, 'lon': 14.454340812... 4107789900 \n",
+ "2 {'lat': 50.052054187478326, 'lon': 14.28598081... 1972872524 \n",
+ "3 {'lat': 50.02775118747833, 'lon': 14.324684812... 866350924 \n",
+ "4 {'lat': 50.02775118747833, 'lon': 14.324684812... 3735254860 \n",
+ ".. ... ... \n",
+ "98 {'lat': 50.08081318747833, 'lon': 14.459052812... 1918002252 \n",
+ "99 {'lat': 50.03685218747833, 'lon': 14.467224812... 2810619212 \n",
+ "100 {'lat': 49.989115187478326, 'lon': 14.41775681... 1567020876 \n",
+ "101 {'lat': 50.11815518747833, 'lon': 14.550433812... 1684042828 \n",
+ "102 {'lat': 50.03348418747833, 'lon': 14.323431812... 347063372 \n",
+ "\n",
+ " exclusively_at_rk \n",
+ "0 0 \n",
+ "1 0 \n",
+ "2 1 \n",
+ "3 0 \n",
+ "4 0 \n",
+ ".. ... \n",
+ "98 0 \n",
+ "99 1 \n",
+ "100 0 \n",
+ "101 0 \n",
+ "102 1 \n",
+ "\n",
+ "[103 rows x 6 columns]"
+ ]
+ },
+ "execution_count": 55,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "clean = df[['locality', 'price', 'name', 'gps','hash_id','exclusively_at_rk']].copy()\n",
+ "clean"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "80deec04-4959-4d9a-8a3a-7cf616e8558a",
+ "metadata": {
+ "tags": []
+ },
+ "source": [
+ "### 2b: GPS\n",
+ "* Convert dictionary in `gps` column into two columns - `lat` and `lon`\n",
+ "* use apply function on gps column\n",
+ "* Note apply can return multiple columns"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 82,
+ "id": "68f281f1-5169-47f6-a89d-ed1d9f416a48",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "clean[['lat', 'lon']] = clean.gps.apply(lambda x: pd.Series({'lat': x['lat'], 'lon': x['lon']}))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 83,
+ "id": "fbd73a3c-83d5-4b74-8232-58ed33ee1edc",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " locality | \n",
+ " price | \n",
+ " name | \n",
+ " gps | \n",
+ " hash_id | \n",
+ " exclusively_at_rk | \n",
+ " lat1 | \n",
+ " lon1 | \n",
+ " lat | \n",
+ " lon | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Praha 9 - Kbely | \n",
+ " 12862000 | \n",
+ " Prodej bytu 4+kk 128 m² | \n",
+ " {'lat': 50.12603618747833, 'lon': 14.561554812... | \n",
+ " 58234188 | \n",
+ " 0 | \n",
+ " 50.126036 | \n",
+ " 14.561555 | \n",
+ " 50.126036 | \n",
+ " 14.561555 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Praha 2 - Vinohrady | \n",
+ " 21566000 | \n",
+ " Prodej bytu 2+kk 126 m² | \n",
+ " {'lat': 50.06495918747833, 'lon': 14.454340812... | \n",
+ " 4107789900 | \n",
+ " 0 | \n",
+ " 50.064959 | \n",
+ " 14.454341 | \n",
+ " 50.064959 | \n",
+ " 14.454341 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Praha 5 - Sobín | \n",
+ " 17382000 | \n",
+ " Prodej bytu 3+kk 97 m² | \n",
+ " {'lat': 50.052054187478326, 'lon': 14.28598081... | \n",
+ " 1972872524 | \n",
+ " 1 | \n",
+ " 50.052054 | \n",
+ " 14.285981 | \n",
+ " 50.052054 | \n",
+ " 14.285981 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Praha 5 - Stodůlky | \n",
+ " 18286000 | \n",
+ " Prodej bytu 4+kk 122 m² | \n",
+ " {'lat': 50.02775118747833, 'lon': 14.324684812... | \n",
+ " 866350924 | \n",
+ " 0 | \n",
+ " 50.027751 | \n",
+ " 14.324685 | \n",
+ " 50.027751 | \n",
+ " 14.324685 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " Praha 5 - Stodůlky | \n",
+ " 14140000 | \n",
+ " Prodej bytu 3+kk 88 m² | \n",
+ " {'lat': 50.02775118747833, 'lon': 14.324684812... | \n",
+ " 3735254860 | \n",
+ " 0 | \n",
+ " 50.027751 | \n",
+ " 14.324685 | \n",
+ " 50.027751 | \n",
+ " 14.324685 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 98 | \n",
+ " Praha 8 - Karlín | \n",
+ " 10236000 | \n",
+ " Prodej bytu 1+kk 60 m² | \n",
+ " {'lat': 50.08081318747833, 'lon': 14.459052812... | \n",
+ " 1918002252 | \n",
+ " 0 | \n",
+ " 50.080813 | \n",
+ " 14.459053 | \n",
+ " 50.080813 | \n",
+ " 14.459053 | \n",
+ "
\n",
+ " \n",
+ " 99 | \n",
+ " Praha 4 - Michle | \n",
+ " 29614000 | \n",
+ " Prodej bytu 3+kk 272 m² | \n",
+ " {'lat': 50.03685218747833, 'lon': 14.467224812... | \n",
+ " 2810619212 | \n",
+ " 1 | \n",
+ " 50.036852 | \n",
+ " 14.467225 | \n",
+ " 50.036852 | \n",
+ " 14.467225 | \n",
+ "
\n",
+ " \n",
+ " 100 | \n",
+ " Praha 4 - Modřany | \n",
+ " 14018000 | \n",
+ " Prodej bytu 3+kk 100 m² | \n",
+ " {'lat': 49.989115187478326, 'lon': 14.41775681... | \n",
+ " 1567020876 | \n",
+ " 0 | \n",
+ " 49.989115 | \n",
+ " 14.417757 | \n",
+ " 49.989115 | \n",
+ " 14.417757 | \n",
+ "
\n",
+ " \n",
+ " 101 | \n",
+ " Praha 9 - Kbely | \n",
+ " 11121000 | \n",
+ " Prodej bytu 3+kk 88 m² | \n",
+ " {'lat': 50.11815518747833, 'lon': 14.550433812... | \n",
+ " 1684042828 | \n",
+ " 0 | \n",
+ " 50.118155 | \n",
+ " 14.550434 | \n",
+ " 50.118155 | \n",
+ " 14.550434 | \n",
+ "
\n",
+ " \n",
+ " 102 | \n",
+ " Praha 5 - Stodůlky | \n",
+ " 11421000 | \n",
+ " Prodej bytu 2+kk 74 m² | \n",
+ " {'lat': 50.03348418747833, 'lon': 14.323431812... | \n",
+ " 347063372 | \n",
+ " 1 | \n",
+ " 50.033484 | \n",
+ " 14.323432 | \n",
+ " 50.033484 | \n",
+ " 14.323432 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
103 rows × 10 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " locality price name \\\n",
+ "0 Praha 9 - Kbely 12862000 Prodej bytu 4+kk 128 m² \n",
+ "1 Praha 2 - Vinohrady 21566000 Prodej bytu 2+kk 126 m² \n",
+ "2 Praha 5 - Sobín 17382000 Prodej bytu 3+kk 97 m² \n",
+ "3 Praha 5 - Stodůlky 18286000 Prodej bytu 4+kk 122 m² \n",
+ "4 Praha 5 - Stodůlky 14140000 Prodej bytu 3+kk 88 m² \n",
+ ".. ... ... ... \n",
+ "98 Praha 8 - Karlín 10236000 Prodej bytu 1+kk 60 m² \n",
+ "99 Praha 4 - Michle 29614000 Prodej bytu 3+kk 272 m² \n",
+ "100 Praha 4 - Modřany 14018000 Prodej bytu 3+kk 100 m² \n",
+ "101 Praha 9 - Kbely 11121000 Prodej bytu 3+kk 88 m² \n",
+ "102 Praha 5 - Stodůlky 11421000 Prodej bytu 2+kk 74 m² \n",
+ "\n",
+ " gps hash_id \\\n",
+ "0 {'lat': 50.12603618747833, 'lon': 14.561554812... 58234188 \n",
+ "1 {'lat': 50.06495918747833, 'lon': 14.454340812... 4107789900 \n",
+ "2 {'lat': 50.052054187478326, 'lon': 14.28598081... 1972872524 \n",
+ "3 {'lat': 50.02775118747833, 'lon': 14.324684812... 866350924 \n",
+ "4 {'lat': 50.02775118747833, 'lon': 14.324684812... 3735254860 \n",
+ ".. ... ... \n",
+ "98 {'lat': 50.08081318747833, 'lon': 14.459052812... 1918002252 \n",
+ "99 {'lat': 50.03685218747833, 'lon': 14.467224812... 2810619212 \n",
+ "100 {'lat': 49.989115187478326, 'lon': 14.41775681... 1567020876 \n",
+ "101 {'lat': 50.11815518747833, 'lon': 14.550433812... 1684042828 \n",
+ "102 {'lat': 50.03348418747833, 'lon': 14.323431812... 347063372 \n",
+ "\n",
+ " exclusively_at_rk lat1 lon1 lat lon \n",
+ "0 0 50.126036 14.561555 50.126036 14.561555 \n",
+ "1 0 50.064959 14.454341 50.064959 14.454341 \n",
+ "2 1 50.052054 14.285981 50.052054 14.285981 \n",
+ "3 0 50.027751 14.324685 50.027751 14.324685 \n",
+ "4 0 50.027751 14.324685 50.027751 14.324685 \n",
+ ".. ... ... ... ... ... \n",
+ "98 0 50.080813 14.459053 50.080813 14.459053 \n",
+ "99 1 50.036852 14.467225 50.036852 14.467225 \n",
+ "100 0 49.989115 14.417757 49.989115 14.417757 \n",
+ "101 0 50.118155 14.550434 50.118155 14.550434 \n",
+ "102 1 50.033484 14.323432 50.033484 14.323432 \n",
+ "\n",
+ "[103 rows x 10 columns]"
+ ]
+ },
+ "execution_count": 83,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "clean"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "36c22408-c327-4c17-b1b4-de54f63f0627",
+ "metadata": {},
+ "source": [
+ "### 2b. Get flat type from name\n",
+ "* Name is always represented by string `Prodej bytu [type of flat] [Area] m^2`\n",
+ "* try picking third word in string\n",
+ "* check meaningfulness using `.value_counts()`"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "150a551c-f321-408d-b8bc-dee6c2fb2adf",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "clean['flat_type'] = clean.name.apply(lambda nm:nm.split()[2])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "5e233b14-db62-41f8-be82-45c861d62e3e",
+ "metadata": {},
+ "source": [
+ "### 2c. Get area from name\n",
+ "* Naive: select the word before last word\n",
+ "* Then try navigating using the index of `'m²'`\n",
+ "* if this also fail, then you will need to use regex"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 97,
+ "id": "94a6f0cf-2c35-42fa-a518-d1249487da1e",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "clean['area_1'] = clean.name.apply(lambda nm:nm.split()[3])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 108,
+ "id": "acfe4582-583d-42b9-acf8-72ccf97cfad5",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "['Prodej', 'bytu', '4+kk', '128', 'm²']\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "4"
+ ]
+ },
+ "execution_count": 108,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "n = 'Prodej bytu 4+kk 128 m²'\n",
+ "splited = n.split()\n",
+ "print(splited)\n",
+ "splited.index('m²')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 110,
+ "id": "02903e8f-5ef2-4cc6-bdcc-fb68d88ea3ba",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "128"
+ ]
+ },
+ "execution_count": 110,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "int(splited[3])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 111,
+ "id": "150ff188-b6d0-4326-95c3-cfc30e6fcb03",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " locality | \n",
+ " price | \n",
+ " name | \n",
+ " gps | \n",
+ " hash_id | \n",
+ " exclusively_at_rk | \n",
+ " lat1 | \n",
+ " lon1 | \n",
+ " lat | \n",
+ " lon | \n",
+ " flat_type | \n",
+ " area | \n",
+ " area_1 | \n",
+ " area_2 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Praha 9 - Kbely | \n",
+ " 12862000 | \n",
+ " Prodej bytu 4+kk 128 m² | \n",
+ " {'lat': 50.12603618747833, 'lon': 14.561554812... | \n",
+ " 58234188 | \n",
+ " 0 | \n",
+ " 50.126036 | \n",
+ " 14.561555 | \n",
+ " 50.126036 | \n",
+ " 14.561555 | \n",
+ " [Prodej, bytu, 4+kk, 128, m²] | \n",
+ " 128 | \n",
+ " 128 | \n",
+ " 128 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Praha 2 - Vinohrady | \n",
+ " 21566000 | \n",
+ " Prodej bytu 2+kk 126 m² | \n",
+ " {'lat': 50.06495918747833, 'lon': 14.454340812... | \n",
+ " 4107789900 | \n",
+ " 0 | \n",
+ " 50.064959 | \n",
+ " 14.454341 | \n",
+ " 50.064959 | \n",
+ " 14.454341 | \n",
+ " [Prodej, bytu, 2+kk, 126, m²] | \n",
+ " 126 | \n",
+ " 126 | \n",
+ " 126 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Praha 5 - Sobín | \n",
+ " 17382000 | \n",
+ " Prodej bytu 3+kk 97 m² | \n",
+ " {'lat': 50.052054187478326, 'lon': 14.28598081... | \n",
+ " 1972872524 | \n",
+ " 1 | \n",
+ " 50.052054 | \n",
+ " 14.285981 | \n",
+ " 50.052054 | \n",
+ " 14.285981 | \n",
+ " [Prodej, bytu, 3+kk, 97, m²] | \n",
+ " 97 | \n",
+ " 97 | \n",
+ " 97 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Praha 5 - Stodůlky | \n",
+ " 18286000 | \n",
+ " Prodej bytu 4+kk 122 m² | \n",
+ " {'lat': 50.02775118747833, 'lon': 14.324684812... | \n",
+ " 866350924 | \n",
+ " 0 | \n",
+ " 50.027751 | \n",
+ " 14.324685 | \n",
+ " 50.027751 | \n",
+ " 14.324685 | \n",
+ " [Prodej, bytu, 4+kk, 122, m²] | \n",
+ " 122 | \n",
+ " 122 | \n",
+ " 122 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " Praha 5 - Stodůlky | \n",
+ " 14140000 | \n",
+ " Prodej bytu 3+kk 88 m² | \n",
+ " {'lat': 50.02775118747833, 'lon': 14.324684812... | \n",
+ " 3735254860 | \n",
+ " 0 | \n",
+ " 50.027751 | \n",
+ " 14.324685 | \n",
+ " 50.027751 | \n",
+ " 14.324685 | \n",
+ " [Prodej, bytu, 3+kk, 88, m²] | \n",
+ " 88 | \n",
+ " 88 | \n",
+ " 88 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 98 | \n",
+ " Praha 8 - Karlín | \n",
+ " 10236000 | \n",
+ " Prodej bytu 1+kk 60 m² | \n",
+ " {'lat': 50.08081318747833, 'lon': 14.459052812... | \n",
+ " 1918002252 | \n",
+ " 0 | \n",
+ " 50.080813 | \n",
+ " 14.459053 | \n",
+ " 50.080813 | \n",
+ " 14.459053 | \n",
+ " [Prodej, bytu, 1+kk, 60, m²] | \n",
+ " 60 | \n",
+ " 60 | \n",
+ " 60 | \n",
+ "
\n",
+ " \n",
+ " 99 | \n",
+ " Praha 4 - Michle | \n",
+ " 29614000 | \n",
+ " Prodej bytu 3+kk 272 m² | \n",
+ " {'lat': 50.03685218747833, 'lon': 14.467224812... | \n",
+ " 2810619212 | \n",
+ " 1 | \n",
+ " 50.036852 | \n",
+ " 14.467225 | \n",
+ " 50.036852 | \n",
+ " 14.467225 | \n",
+ " [Prodej, bytu, 3+kk, 272, m²] | \n",
+ " 272 | \n",
+ " 272 | \n",
+ " 272 | \n",
+ "
\n",
+ " \n",
+ " 100 | \n",
+ " Praha 4 - Modřany | \n",
+ " 14018000 | \n",
+ " Prodej bytu 3+kk 100 m² | \n",
+ " {'lat': 49.989115187478326, 'lon': 14.41775681... | \n",
+ " 1567020876 | \n",
+ " 0 | \n",
+ " 49.989115 | \n",
+ " 14.417757 | \n",
+ " 49.989115 | \n",
+ " 14.417757 | \n",
+ " [Prodej, bytu, 3+kk, 100, m²] | \n",
+ " 100 | \n",
+ " 100 | \n",
+ " 100 | \n",
+ "
\n",
+ " \n",
+ " 101 | \n",
+ " Praha 9 - Kbely | \n",
+ " 11121000 | \n",
+ " Prodej bytu 3+kk 88 m² | \n",
+ " {'lat': 50.11815518747833, 'lon': 14.550433812... | \n",
+ " 1684042828 | \n",
+ " 0 | \n",
+ " 50.118155 | \n",
+ " 14.550434 | \n",
+ " 50.118155 | \n",
+ " 14.550434 | \n",
+ " [Prodej, bytu, 3+kk, 88, m²] | \n",
+ " 88 | \n",
+ " 88 | \n",
+ " 88 | \n",
+ "
\n",
+ " \n",
+ " 102 | \n",
+ " Praha 5 - Stodůlky | \n",
+ " 11421000 | \n",
+ " Prodej bytu 2+kk 74 m² | \n",
+ " {'lat': 50.03348418747833, 'lon': 14.323431812... | \n",
+ " 347063372 | \n",
+ " 1 | \n",
+ " 50.033484 | \n",
+ " 14.323432 | \n",
+ " 50.033484 | \n",
+ " 14.323432 | \n",
+ " [Prodej, bytu, 2+kk, 74, m²] | \n",
+ " 74 | \n",
+ " 74 | \n",
+ " 74 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
103 rows × 14 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " locality price name \\\n",
+ "0 Praha 9 - Kbely 12862000 Prodej bytu 4+kk 128 m² \n",
+ "1 Praha 2 - Vinohrady 21566000 Prodej bytu 2+kk 126 m² \n",
+ "2 Praha 5 - Sobín 17382000 Prodej bytu 3+kk 97 m² \n",
+ "3 Praha 5 - Stodůlky 18286000 Prodej bytu 4+kk 122 m² \n",
+ "4 Praha 5 - Stodůlky 14140000 Prodej bytu 3+kk 88 m² \n",
+ ".. ... ... ... \n",
+ "98 Praha 8 - Karlín 10236000 Prodej bytu 1+kk 60 m² \n",
+ "99 Praha 4 - Michle 29614000 Prodej bytu 3+kk 272 m² \n",
+ "100 Praha 4 - Modřany 14018000 Prodej bytu 3+kk 100 m² \n",
+ "101 Praha 9 - Kbely 11121000 Prodej bytu 3+kk 88 m² \n",
+ "102 Praha 5 - Stodůlky 11421000 Prodej bytu 2+kk 74 m² \n",
+ "\n",
+ " gps hash_id \\\n",
+ "0 {'lat': 50.12603618747833, 'lon': 14.561554812... 58234188 \n",
+ "1 {'lat': 50.06495918747833, 'lon': 14.454340812... 4107789900 \n",
+ "2 {'lat': 50.052054187478326, 'lon': 14.28598081... 1972872524 \n",
+ "3 {'lat': 50.02775118747833, 'lon': 14.324684812... 866350924 \n",
+ "4 {'lat': 50.02775118747833, 'lon': 14.324684812... 3735254860 \n",
+ ".. ... ... \n",
+ "98 {'lat': 50.08081318747833, 'lon': 14.459052812... 1918002252 \n",
+ "99 {'lat': 50.03685218747833, 'lon': 14.467224812... 2810619212 \n",
+ "100 {'lat': 49.989115187478326, 'lon': 14.41775681... 1567020876 \n",
+ "101 {'lat': 50.11815518747833, 'lon': 14.550433812... 1684042828 \n",
+ "102 {'lat': 50.03348418747833, 'lon': 14.323431812... 347063372 \n",
+ "\n",
+ " exclusively_at_rk lat1 lon1 lat lon \\\n",
+ "0 0 50.126036 14.561555 50.126036 14.561555 \n",
+ "1 0 50.064959 14.454341 50.064959 14.454341 \n",
+ "2 1 50.052054 14.285981 50.052054 14.285981 \n",
+ "3 0 50.027751 14.324685 50.027751 14.324685 \n",
+ "4 0 50.027751 14.324685 50.027751 14.324685 \n",
+ ".. ... ... ... ... ... \n",
+ "98 0 50.080813 14.459053 50.080813 14.459053 \n",
+ "99 1 50.036852 14.467225 50.036852 14.467225 \n",
+ "100 0 49.989115 14.417757 49.989115 14.417757 \n",
+ "101 0 50.118155 14.550434 50.118155 14.550434 \n",
+ "102 1 50.033484 14.323432 50.033484 14.323432 \n",
+ "\n",
+ " flat_type area area_1 area_2 \n",
+ "0 [Prodej, bytu, 4+kk, 128, m²] 128 128 128 \n",
+ "1 [Prodej, bytu, 2+kk, 126, m²] 126 126 126 \n",
+ "2 [Prodej, bytu, 3+kk, 97, m²] 97 97 97 \n",
+ "3 [Prodej, bytu, 4+kk, 122, m²] 122 122 122 \n",
+ "4 [Prodej, bytu, 3+kk, 88, m²] 88 88 88 \n",
+ ".. ... ... ... ... \n",
+ "98 [Prodej, bytu, 1+kk, 60, m²] 60 60 60 \n",
+ "99 [Prodej, bytu, 3+kk, 272, m²] 272 272 272 \n",
+ "100 [Prodej, bytu, 3+kk, 100, m²] 100 100 100 \n",
+ "101 [Prodej, bytu, 3+kk, 88, m²] 88 88 88 \n",
+ "102 [Prodej, bytu, 2+kk, 74, m²] 74 74 74 \n",
+ "\n",
+ "[103 rows x 14 columns]"
+ ]
+ },
+ "execution_count": 111,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "def name_to_area(nm):\n",
+ " splitted= nm.split()\n",
+ " m2_idx = splitted.index('m²')\n",
+ " return int(splitted[m2_idx-1])\n",
+ "\n",
+ "clean['area_2'] = clean.name.apply(name_to_area)\n",
+ "clean"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 115,
+ "id": "3bc089d8-eab3-4e85-9f79-ce30291b456c",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " locality | \n",
+ " price | \n",
+ " name | \n",
+ " gps | \n",
+ " hash_id | \n",
+ " exclusively_at_rk | \n",
+ " lat1 | \n",
+ " lon1 | \n",
+ " lat | \n",
+ " lon | \n",
+ " flat_type | \n",
+ " area | \n",
+ " area_1 | \n",
+ " area_2 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ "Empty DataFrame\n",
+ "Columns: [locality, price, name, gps, hash_id, exclusively_at_rk, lat1, lon1, lat, lon, flat_type, area, area_1, area_2]\n",
+ "Index: []"
+ ]
+ },
+ "execution_count": 115,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "clean[clean['area_1']==clean['area_2']]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 114,
+ "id": "fbc4fd99-9dd9-43d0-9e4b-e49e48556534",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " locality | \n",
+ " price | \n",
+ " name | \n",
+ " gps | \n",
+ " hash_id | \n",
+ " exclusively_at_rk | \n",
+ " lat1 | \n",
+ " lon1 | \n",
+ " lat | \n",
+ " lon | \n",
+ " flat_type | \n",
+ " area | \n",
+ " area_1 | \n",
+ " area_2 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Praha 9 - Kbely | \n",
+ " 12862000 | \n",
+ " Prodej bytu 4+kk 128 m² | \n",
+ " {'lat': 50.12603618747833, 'lon': 14.561554812... | \n",
+ " 58234188 | \n",
+ " 0 | \n",
+ " 50.126036 | \n",
+ " 14.561555 | \n",
+ " 50.126036 | \n",
+ " 14.561555 | \n",
+ " [Prodej, bytu, 4+kk, 128, m²] | \n",
+ " 128 | \n",
+ " 128 | \n",
+ " 128 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Praha 2 - Vinohrady | \n",
+ " 21566000 | \n",
+ " Prodej bytu 2+kk 126 m² | \n",
+ " {'lat': 50.06495918747833, 'lon': 14.454340812... | \n",
+ " 4107789900 | \n",
+ " 0 | \n",
+ " 50.064959 | \n",
+ " 14.454341 | \n",
+ " 50.064959 | \n",
+ " 14.454341 | \n",
+ " [Prodej, bytu, 2+kk, 126, m²] | \n",
+ " 126 | \n",
+ " 126 | \n",
+ " 126 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Praha 5 - Sobín | \n",
+ " 17382000 | \n",
+ " Prodej bytu 3+kk 97 m² | \n",
+ " {'lat': 50.052054187478326, 'lon': 14.28598081... | \n",
+ " 1972872524 | \n",
+ " 1 | \n",
+ " 50.052054 | \n",
+ " 14.285981 | \n",
+ " 50.052054 | \n",
+ " 14.285981 | \n",
+ " [Prodej, bytu, 3+kk, 97, m²] | \n",
+ " 97 | \n",
+ " 97 | \n",
+ " 97 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Praha 5 - Stodůlky | \n",
+ " 18286000 | \n",
+ " Prodej bytu 4+kk 122 m² | \n",
+ " {'lat': 50.02775118747833, 'lon': 14.324684812... | \n",
+ " 866350924 | \n",
+ " 0 | \n",
+ " 50.027751 | \n",
+ " 14.324685 | \n",
+ " 50.027751 | \n",
+ " 14.324685 | \n",
+ " [Prodej, bytu, 4+kk, 122, m²] | \n",
+ " 122 | \n",
+ " 122 | \n",
+ " 122 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " Praha 5 - Stodůlky | \n",
+ " 14140000 | \n",
+ " Prodej bytu 3+kk 88 m² | \n",
+ " {'lat': 50.02775118747833, 'lon': 14.324684812... | \n",
+ " 3735254860 | \n",
+ " 0 | \n",
+ " 50.027751 | \n",
+ " 14.324685 | \n",
+ " 50.027751 | \n",
+ " 14.324685 | \n",
+ " [Prodej, bytu, 3+kk, 88, m²] | \n",
+ " 88 | \n",
+ " 88 | \n",
+ " 88 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 98 | \n",
+ " Praha 8 - Karlín | \n",
+ " 10236000 | \n",
+ " Prodej bytu 1+kk 60 m² | \n",
+ " {'lat': 50.08081318747833, 'lon': 14.459052812... | \n",
+ " 1918002252 | \n",
+ " 0 | \n",
+ " 50.080813 | \n",
+ " 14.459053 | \n",
+ " 50.080813 | \n",
+ " 14.459053 | \n",
+ " [Prodej, bytu, 1+kk, 60, m²] | \n",
+ " 60 | \n",
+ " 60 | \n",
+ " 60 | \n",
+ "
\n",
+ " \n",
+ " 99 | \n",
+ " Praha 4 - Michle | \n",
+ " 29614000 | \n",
+ " Prodej bytu 3+kk 272 m² | \n",
+ " {'lat': 50.03685218747833, 'lon': 14.467224812... | \n",
+ " 2810619212 | \n",
+ " 1 | \n",
+ " 50.036852 | \n",
+ " 14.467225 | \n",
+ " 50.036852 | \n",
+ " 14.467225 | \n",
+ " [Prodej, bytu, 3+kk, 272, m²] | \n",
+ " 272 | \n",
+ " 272 | \n",
+ " 272 | \n",
+ "
\n",
+ " \n",
+ " 100 | \n",
+ " Praha 4 - Modřany | \n",
+ " 14018000 | \n",
+ " Prodej bytu 3+kk 100 m² | \n",
+ " {'lat': 49.989115187478326, 'lon': 14.41775681... | \n",
+ " 1567020876 | \n",
+ " 0 | \n",
+ " 49.989115 | \n",
+ " 14.417757 | \n",
+ " 49.989115 | \n",
+ " 14.417757 | \n",
+ " [Prodej, bytu, 3+kk, 100, m²] | \n",
+ " 100 | \n",
+ " 100 | \n",
+ " 100 | \n",
+ "
\n",
+ " \n",
+ " 101 | \n",
+ " Praha 9 - Kbely | \n",
+ " 11121000 | \n",
+ " Prodej bytu 3+kk 88 m² | \n",
+ " {'lat': 50.11815518747833, 'lon': 14.550433812... | \n",
+ " 1684042828 | \n",
+ " 0 | \n",
+ " 50.118155 | \n",
+ " 14.550434 | \n",
+ " 50.118155 | \n",
+ " 14.550434 | \n",
+ " [Prodej, bytu, 3+kk, 88, m²] | \n",
+ " 88 | \n",
+ " 88 | \n",
+ " 88 | \n",
+ "
\n",
+ " \n",
+ " 102 | \n",
+ " Praha 5 - Stodůlky | \n",
+ " 11421000 | \n",
+ " Prodej bytu 2+kk 74 m² | \n",
+ " {'lat': 50.03348418747833, 'lon': 14.323431812... | \n",
+ " 347063372 | \n",
+ " 1 | \n",
+ " 50.033484 | \n",
+ " 14.323432 | \n",
+ " 50.033484 | \n",
+ " 14.323432 | \n",
+ " [Prodej, bytu, 2+kk, 74, m²] | \n",
+ " 74 | \n",
+ " 74 | \n",
+ " 74 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
103 rows × 14 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " locality price name \\\n",
+ "0 Praha 9 - Kbely 12862000 Prodej bytu 4+kk 128 m² \n",
+ "1 Praha 2 - Vinohrady 21566000 Prodej bytu 2+kk 126 m² \n",
+ "2 Praha 5 - Sobín 17382000 Prodej bytu 3+kk 97 m² \n",
+ "3 Praha 5 - Stodůlky 18286000 Prodej bytu 4+kk 122 m² \n",
+ "4 Praha 5 - Stodůlky 14140000 Prodej bytu 3+kk 88 m² \n",
+ ".. ... ... ... \n",
+ "98 Praha 8 - Karlín 10236000 Prodej bytu 1+kk 60 m² \n",
+ "99 Praha 4 - Michle 29614000 Prodej bytu 3+kk 272 m² \n",
+ "100 Praha 4 - Modřany 14018000 Prodej bytu 3+kk 100 m² \n",
+ "101 Praha 9 - Kbely 11121000 Prodej bytu 3+kk 88 m² \n",
+ "102 Praha 5 - Stodůlky 11421000 Prodej bytu 2+kk 74 m² \n",
+ "\n",
+ " gps hash_id \\\n",
+ "0 {'lat': 50.12603618747833, 'lon': 14.561554812... 58234188 \n",
+ "1 {'lat': 50.06495918747833, 'lon': 14.454340812... 4107789900 \n",
+ "2 {'lat': 50.052054187478326, 'lon': 14.28598081... 1972872524 \n",
+ "3 {'lat': 50.02775118747833, 'lon': 14.324684812... 866350924 \n",
+ "4 {'lat': 50.02775118747833, 'lon': 14.324684812... 3735254860 \n",
+ ".. ... ... \n",
+ "98 {'lat': 50.08081318747833, 'lon': 14.459052812... 1918002252 \n",
+ "99 {'lat': 50.03685218747833, 'lon': 14.467224812... 2810619212 \n",
+ "100 {'lat': 49.989115187478326, 'lon': 14.41775681... 1567020876 \n",
+ "101 {'lat': 50.11815518747833, 'lon': 14.550433812... 1684042828 \n",
+ "102 {'lat': 50.03348418747833, 'lon': 14.323431812... 347063372 \n",
+ "\n",
+ " exclusively_at_rk lat1 lon1 lat lon \\\n",
+ "0 0 50.126036 14.561555 50.126036 14.561555 \n",
+ "1 0 50.064959 14.454341 50.064959 14.454341 \n",
+ "2 1 50.052054 14.285981 50.052054 14.285981 \n",
+ "3 0 50.027751 14.324685 50.027751 14.324685 \n",
+ "4 0 50.027751 14.324685 50.027751 14.324685 \n",
+ ".. ... ... ... ... ... \n",
+ "98 0 50.080813 14.459053 50.080813 14.459053 \n",
+ "99 1 50.036852 14.467225 50.036852 14.467225 \n",
+ "100 0 49.989115 14.417757 49.989115 14.417757 \n",
+ "101 0 50.118155 14.550434 50.118155 14.550434 \n",
+ "102 1 50.033484 14.323432 50.033484 14.323432 \n",
+ "\n",
+ " flat_type area area_1 area_2 \n",
+ "0 [Prodej, bytu, 4+kk, 128, m²] 128 128 128 \n",
+ "1 [Prodej, bytu, 2+kk, 126, m²] 126 126 126 \n",
+ "2 [Prodej, bytu, 3+kk, 97, m²] 97 97 97 \n",
+ "3 [Prodej, bytu, 4+kk, 122, m²] 122 122 122 \n",
+ "4 [Prodej, bytu, 3+kk, 88, m²] 88 88 88 \n",
+ ".. ... ... ... ... \n",
+ "98 [Prodej, bytu, 1+kk, 60, m²] 60 60 60 \n",
+ "99 [Prodej, bytu, 3+kk, 272, m²] 272 272 272 \n",
+ "100 [Prodej, bytu, 3+kk, 100, m²] 100 100 100 \n",
+ "101 [Prodej, bytu, 3+kk, 88, m²] 88 88 88 \n",
+ "102 [Prodej, bytu, 2+kk, 74, m²] 74 74 74 \n",
+ "\n",
+ "[103 rows x 14 columns]"
+ ]
+ },
+ "execution_count": 114,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "clean[clean['area_1'].astype(int)==clean['area_2']]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "ce71f809-7a5a-487e-882a-6aa9c7124727",
+ "metadata": {},
+ "source": [
+ "## Bonus: Convert `labelsAll` into categorical variables\n",
+ "\n",
+ "### Task 4a. Get all possible label names\n",
+ "* deal with nested-list structure\n",
+ "* Hint: try to sum the whole column\n",
+ "* Needed to Iterate through all labels in all rows and "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 141,
+ "id": "03f3d060-5967-48af-9789-cade7acb715b",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['d', 'c']"
+ ]
+ },
+ "execution_count": 141,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "['d'] + ['c']"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 152,
+ "id": "073e90c7-bec6-4b04-bba3-cf095cdc65f5",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['natural_attraction',\n",
+ " 'kindergarten',\n",
+ " 'tram',\n",
+ " 'movies',\n",
+ " 'cellar',\n",
+ " 'brick',\n",
+ " 'candy_shop',\n",
+ " 'train',\n",
+ " 'metro',\n",
+ " 'bus_public_transport',\n",
+ " 'playground',\n",
+ " 'personal',\n",
+ " 'tavern',\n",
+ " 'loggia',\n",
+ " 'elevator',\n",
+ " 'school',\n",
+ " 'small_shop',\n",
+ " 'parking_lots',\n",
+ " 'partly_furnished',\n",
+ " 'new_building',\n",
+ " 'vet',\n",
+ " 'theater',\n",
+ " 'balcony',\n",
+ " 'not_furnished',\n",
+ " 'shop',\n",
+ " 'medic',\n",
+ " 'post_office',\n",
+ " 'sightseeing',\n",
+ " 'restaurant',\n",
+ " 'in_construction',\n",
+ " 'atm',\n",
+ " 'sports',\n",
+ " 'garage',\n",
+ " 'drugstore']"
+ ]
+ },
+ "execution_count": 152,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "possible_labels = list(set([i for sublist in raw.labelsAll.sum() for i in sublist]))\n",
+ "possible_labels"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "db0b86aa-57b0-439d-a82f-1d8f962be7c2",
+ "metadata": {},
+ "source": [
+ "### 4b. Test existence of label `cellar` for offers\n",
+ "* again deal with nested list of list structure\n",
+ "* write generic function `test_existence_of_label(offer_labels,label)`"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 163,
+ "id": "a633c468-e096-46bf-a51e-0f30f11cca26",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0 False\n",
+ "1 False\n",
+ "2 True\n",
+ "3 True\n",
+ "4 False\n",
+ "5 True\n",
+ "6 True\n",
+ "7 False\n",
+ "8 True\n",
+ "9 True\n",
+ "10 True\n",
+ "11 True\n",
+ "12 True\n",
+ "13 True\n",
+ "14 True\n",
+ "15 True\n",
+ "16 True\n",
+ "17 True\n",
+ "18 True\n",
+ "19 True\n",
+ "20 True\n",
+ "Name: labelsAll, dtype: bool"
+ ]
+ },
+ "execution_count": 163,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "def test_existence_of_label(offer_labels,label):\n",
+ " return 'cellar' in [item for sublist in offer_labels for item in sublist]\n",
+ "\n",
+ "raw.labelsAll.apply(lambda offer_labels: test_existence_of_label(offer_labels, 'cellar'))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "d5e22365-b2d8-4c57-a0cd-7297efb8b948",
+ "metadata": {},
+ "source": [
+ "### 4c. Test existence of all possible labels\n",
+ "* use apply returning series with all labels"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 164,
+ "id": "8165a5a4-a52c-453a-b3e9-39d868fe5501",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " natural_attraction | \n",
+ " kindergarten | \n",
+ " tram | \n",
+ " movies | \n",
+ " cellar | \n",
+ " brick | \n",
+ " candy_shop | \n",
+ " train | \n",
+ " metro | \n",
+ " bus_public_transport | \n",
+ " ... | \n",
+ " shop | \n",
+ " medic | \n",
+ " post_office | \n",
+ " sightseeing | \n",
+ " restaurant | \n",
+ " in_construction | \n",
+ " atm | \n",
+ " sports | \n",
+ " garage | \n",
+ " drugstore | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " ... | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " ... | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " ... | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " ... | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " ... | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " ... | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " ... | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " ... | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " ... | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " ... | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " 10 | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " ... | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " 11 | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " ... | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " 12 | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " ... | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " 13 | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " ... | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " 14 | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " ... | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " 15 | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " ... | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " 16 | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " ... | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " 17 | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " ... | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " 18 | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " ... | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " 19 | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " ... | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " 20 | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " ... | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
21 rows × 34 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " natural_attraction kindergarten tram movies cellar brick \\\n",
+ "0 False False False False False False \n",
+ "1 False False False False False False \n",
+ "2 True True True True True True \n",
+ "3 True True True True True True \n",
+ "4 False False False False False False \n",
+ "5 True True True True True True \n",
+ "6 True True True True True True \n",
+ "7 False False False False False False \n",
+ "8 True True True True True True \n",
+ "9 True True True True True True \n",
+ "10 True True True True True True \n",
+ "11 True True True True True True \n",
+ "12 True True True True True True \n",
+ "13 True True True True True True \n",
+ "14 True True True True True True \n",
+ "15 True True True True True True \n",
+ "16 True True True True True True \n",
+ "17 True True True True True True \n",
+ "18 True True True True True True \n",
+ "19 True True True True True True \n",
+ "20 True True True True True True \n",
+ "\n",
+ " candy_shop train metro bus_public_transport ... shop medic \\\n",
+ "0 False False False False ... False False \n",
+ "1 False False False False ... False False \n",
+ "2 True True True True ... True True \n",
+ "3 True True True True ... True True \n",
+ "4 False False False False ... False False \n",
+ "5 True True True True ... True True \n",
+ "6 True True True True ... True True \n",
+ "7 False False False False ... False False \n",
+ "8 True True True True ... True True \n",
+ "9 True True True True ... True True \n",
+ "10 True True True True ... True True \n",
+ "11 True True True True ... True True \n",
+ "12 True True True True ... True True \n",
+ "13 True True True True ... True True \n",
+ "14 True True True True ... True True \n",
+ "15 True True True True ... True True \n",
+ "16 True True True True ... True True \n",
+ "17 True True True True ... True True \n",
+ "18 True True True True ... True True \n",
+ "19 True True True True ... True True \n",
+ "20 True True True True ... True True \n",
+ "\n",
+ " post_office sightseeing restaurant in_construction atm sports \\\n",
+ "0 False False False False False False \n",
+ "1 False False False False False False \n",
+ "2 True True True True True True \n",
+ "3 True True True True True True \n",
+ "4 False False False False False False \n",
+ "5 True True True True True True \n",
+ "6 True True True True True True \n",
+ "7 False False False False False False \n",
+ "8 True True True True True True \n",
+ "9 True True True True True True \n",
+ "10 True True True True True True \n",
+ "11 True True True True True True \n",
+ "12 True True True True True True \n",
+ "13 True True True True True True \n",
+ "14 True True True True True True \n",
+ "15 True True True True True True \n",
+ "16 True True True True True True \n",
+ "17 True True True True True True \n",
+ "18 True True True True True True \n",
+ "19 True True True True True True \n",
+ "20 True True True True True True \n",
+ "\n",
+ " garage drugstore \n",
+ "0 False False \n",
+ "1 False False \n",
+ "2 True True \n",
+ "3 True True \n",
+ "4 False False \n",
+ "5 True True \n",
+ "6 True True \n",
+ "7 False False \n",
+ "8 True True \n",
+ "9 True True \n",
+ "10 True True \n",
+ "11 True True \n",
+ "12 True True \n",
+ "13 True True \n",
+ "14 True True \n",
+ "15 True True \n",
+ "16 True True \n",
+ "17 True True \n",
+ "18 True True \n",
+ "19 True True \n",
+ "20 True True \n",
+ "\n",
+ "[21 rows x 34 columns]"
+ ]
+ },
+ "execution_count": 164,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "def existence_of_all_labels(offer_labels, possible_labels):\n",
+ " return pd.Series({\n",
+ " label:test_existence_of_label(offer_labels,label)\n",
+ " for label in possible_labels\n",
+ " })\n",
+ "\n",
+ "raw.labelsAll.apply(lambda offer_labels: existence_of_all_labels(offer_labels, possible_labels))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "8eceb6c0-9af6-4fb9-b178-f371dd453d39",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.8.10"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}