diff --git a/2_DataCleaningAndWrangling/Data_Wrangling_WHO.ipynb b/2_DataCleaningAndWrangling/Data_Wrangling_WHO.ipynb
index 2bc5f6a..355e034 100644
--- a/2_DataCleaningAndWrangling/Data_Wrangling_WHO.ipynb
+++ b/2_DataCleaningAndWrangling/Data_Wrangling_WHO.ipynb
@@ -131,7 +131,7 @@
},
{
"cell_type": "code",
- "execution_count": 1,
+ "execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
@@ -171,14 +171,14 @@
},
{
"cell_type": "code",
- "execution_count": 2,
+ "execution_count": 106,
"metadata": {},
"outputs": [],
"source": [
"# Read the file from my computer\"\n",
"#NOTE: Change the path to much yours\n",
- "my_path = \"/Users/mary-tziraki/git/TeamCoders_Event_Based_Model/2_DataCleaningAndWrangling/who_suicide_statistics_modified3.csv\"\n",
- "data_raw = pd.read_csv(my_path, header=None)"
+ "my_path = \"./who_suicide_statistics_modified3.csv\"\n",
+ "data_raw = pd.read_csv(my_path)"
]
},
{
@@ -190,7 +190,7 @@
},
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": 107,
"metadata": {
"scrolled": true
},
@@ -216,30 +216,19 @@
" \n",
" \n",
" | \n",
- " 0 | \n",
- " 1 | \n",
- " 2 | \n",
- " 3 | \n",
- " 4 | \n",
- " 5 | \n",
- " 6 | \n",
- " 7 | \n",
+ " country | \n",
+ " year | \n",
+ " sex | \n",
+ " age | \n",
+ " suicides_no | \n",
+ " population | \n",
+ " HDI for year | \n",
+ " gdp_for_year ($) | \n",
"
\n",
" \n",
"
\n",
" \n",
" | 0 | \n",
- " country | \n",
- " year | \n",
- " sex | \n",
- " age | \n",
- " suicides_no | \n",
- " population | \n",
- " HDI for year | \n",
- " gdp_for_year ($) | \n",
- "
\n",
- " \n",
- " | 1 | \n",
" Albania | \n",
" 1987 | \n",
" male | \n",
@@ -250,7 +239,7 @@
" 2,156,624,900 | \n",
"
\n",
" \n",
- " | 2 | \n",
+ " 1 | \n",
" Albania | \n",
" 1987 | \n",
" male | \n",
@@ -261,7 +250,7 @@
" 2,156,624,900 | \n",
"
\n",
" \n",
- " | 3 | \n",
+ " 2 | \n",
" Albania | \n",
" 1987 | \n",
" female | \n",
@@ -272,7 +261,7 @@
" 2,156,624,900 | \n",
"
\n",
" \n",
- " | 4 | \n",
+ " 3 | \n",
" Albania | \n",
" 1987 | \n",
" male | \n",
@@ -282,33 +271,125 @@
" NaN | \n",
" 2,156,624,900 | \n",
"
\n",
+ " \n",
+ " | 4 | \n",
+ " Albania | \n",
+ " 1987 | \n",
+ " male | \n",
+ " 25-34 years | \n",
+ " 9 | \n",
+ " 274300 | \n",
+ " NaN | \n",
+ " 2,156,624,900 | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 27835 | \n",
+ " Belgium | \n",
+ " 2011 | \n",
+ " female | \n",
+ " 25-34 years | \n",
+ " 6 | \n",
+ " 707535 | \n",
+ " 0.886 | \n",
+ " 527,008,453,887 | \n",
+ "
\n",
+ " \n",
+ " | 27836 | \n",
+ " Thailand | \n",
+ " 2016 | \n",
+ " male | \n",
+ " 75+ years | \n",
+ " 152 | \n",
+ " 1124052 | \n",
+ " NaN | \n",
+ " 411,755,164,833 | \n",
+ "
\n",
+ " \n",
+ " | 27837 | \n",
+ " Netherlands | \n",
+ " 1998 | \n",
+ " female | \n",
+ " 15-24 years | \n",
+ " 21 | \n",
+ " 934500 | \n",
+ " NaN | \n",
+ " 432,476,116,419 | \n",
+ "
\n",
+ " \n",
+ " | 27838 | \n",
+ " Grenada | \n",
+ " 2002 | \n",
+ " female | \n",
+ " 5-14 years | \n",
+ " NaN | \n",
+ " 11760 | \n",
+ " NaN | \n",
+ " 540,336,926 | \n",
+ "
\n",
+ " \n",
+ " | 27839 | \n",
+ " Mexico | \n",
+ " 1988 | \n",
+ " female | \n",
+ " 75+ years | \n",
+ " 7 | \n",
+ " 614000 | \n",
+ " NaN | \n",
+ " 183,144,164,357 | \n",
+ "
\n",
" \n",
"\n",
+ "27840 rows × 8 columns
\n",
""
],
"text/plain": [
- " 0 1 2 3 4 5 6 \\\n",
- "0 country year sex age suicides_no population HDI for year \n",
- "1 Albania 1987 male 15-24 years 21 312900 NaN \n",
- "2 Albania 1987 male 35-54 years 16 308000 NaN \n",
- "3 Albania 1987 female 15-24 years 14 289700 NaN \n",
- "4 Albania 1987 male 75+ years 1 21800 NaN \n",
+ " country year sex age suicides_no population \\\n",
+ "0 Albania 1987 male 15-24 years 21 312900 \n",
+ "1 Albania 1987 male 35-54 years 16 308000 \n",
+ "2 Albania 1987 female 15-24 years 14 289700 \n",
+ "3 Albania 1987 male 75+ years 1 21800 \n",
+ "4 Albania 1987 male 25-34 years 9 274300 \n",
+ "... ... ... ... ... ... ... \n",
+ "27835 Belgium 2011 female 25-34 years 6 707535 \n",
+ "27836 Thailand 2016 male 75+ years 152 1124052 \n",
+ "27837 Netherlands 1998 female 15-24 years 21 934500 \n",
+ "27838 Grenada 2002 female 5-14 years NaN 11760 \n",
+ "27839 Mexico 1988 female 75+ years 7 614000 \n",
"\n",
- " 7 \n",
- "0 gdp_for_year ($) \n",
- "1 2,156,624,900 \n",
- "2 2,156,624,900 \n",
- "3 2,156,624,900 \n",
- "4 2,156,624,900 "
+ " HDI for year gdp_for_year ($) \n",
+ "0 NaN 2,156,624,900 \n",
+ "1 NaN 2,156,624,900 \n",
+ "2 NaN 2,156,624,900 \n",
+ "3 NaN 2,156,624,900 \n",
+ "4 NaN 2,156,624,900 \n",
+ "... ... ... \n",
+ "27835 0.886 527,008,453,887 \n",
+ "27836 NaN 411,755,164,833 \n",
+ "27837 NaN 432,476,116,419 \n",
+ "27838 NaN 540,336,926 \n",
+ "27839 NaN 183,144,164,357 \n",
+ "\n",
+ "[27840 rows x 8 columns]"
]
},
- "execution_count": 3,
+ "execution_count": 107,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "data_raw.head()"
+ "data_raw"
]
},
{
@@ -1317,7 +1398,32 @@
},
{
"cell_type": "code",
- "execution_count": 9,
+ "execution_count": 12,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "country year sex age suicides_no population HDI for year gdp_for_year ($) \n",
+ "True True True True True True False True 16357\n",
+ " True True 7218\n",
+ " False True False True 3115\n",
+ " True True 1150\n",
+ "dtype: int64"
+ ]
+ },
+ "execution_count": 12,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "(~data_raw.isnull()).value_counts()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 22,
"metadata": {
"scrolled": true
},
@@ -1364,18 +1470,17 @@
}
],
"source": [
- "for column in missing_data.columns.values.tolist():\n",
+ "missing_data = data_raw.isnull()\n",
+ "for column in data_raw.columns:\n",
" print(column)\n",
- " print (missing_data[column].value_counts())\n",
- " print(\"\") "
+ " print(missing_data[column].value_counts())\n",
+ " print() "
]
},
{
"cell_type": "code",
"execution_count": 10,
- "metadata": {
- "scrolled": false
- },
+ "metadata": {},
"outputs": [
{
"data": {
@@ -1440,6 +1545,409 @@
"A regular expression is a special sequence of characters that helps you match or find other strings or sets of strings, using a specialized syntax held in a pattern."
]
},
+ {
+ "cell_type": "code",
+ "execution_count": 68,
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "suicides = data_raw['suicides_no']"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 90,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array(['Null', 'Unknown'], dtype=object)"
+ ]
+ },
+ "execution_count": 90,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "data_raw['suicides_no'][data_raw['suicides_no'].str.isdigit() == False].unique()\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 55,
+ "metadata": {
+ "collapsed": true,
+ "jupyter": {
+ "outputs_hidden": true
+ },
+ "tags": []
+ },
+ "outputs": [
+ {
+ "ename": "ValueError",
+ "evalue": "Unable to parse string \"Null\" at position 0",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+ "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
+ "\u001b[0;32m~/Documents/.config/packman/miniconda3/envs/teaching/lib/python3.8/site-packages/pandas/_libs/lib.pyx\u001b[0m in \u001b[0;36mpandas._libs.lib.maybe_convert_numeric\u001b[0;34m()\u001b[0m\n",
+ "\u001b[0;31mValueError\u001b[0m: Unable to parse string \"Null\"",
+ "\nDuring handling of the above exception, another exception occurred:\n",
+ "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
+ "\u001b[0;32m/tmp/ipykernel_91720/1399709450.py\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0msuicides2\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msuicides\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto_numeric\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
+ "\u001b[0;32m~/Documents/.config/packman/miniconda3/envs/teaching/lib/python3.8/site-packages/pandas/core/series.py\u001b[0m in \u001b[0;36mapply\u001b[0;34m(self, func, convert_dtype, args, **kwargs)\u001b[0m\n\u001b[1;32m 4355\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mfloat64\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4356\u001b[0m \"\"\"\n\u001b[0;32m-> 4357\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mSeriesApply\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mconvert_dtype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 4358\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4359\u001b[0m def _reduce(\n",
+ "\u001b[0;32m~/Documents/.config/packman/miniconda3/envs/teaching/lib/python3.8/site-packages/pandas/core/apply.py\u001b[0m in \u001b[0;36mapply\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1041\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply_str\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1042\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1043\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply_standard\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1044\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1045\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0magg\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+ "\u001b[0;32m~/Documents/.config/packman/miniconda3/envs/teaching/lib/python3.8/site-packages/pandas/core/apply.py\u001b[0m in \u001b[0;36mapply_standard\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1096\u001b[0m \u001b[0;31m# List[Union[Callable[..., Any], str]]]]]\"; expected\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1097\u001b[0m \u001b[0;31m# \"Callable[[Any], Any]\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1098\u001b[0;31m mapped = lib.map_infer(\n\u001b[0m\u001b[1;32m 1099\u001b[0m \u001b[0mvalues\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1100\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;31m# type: ignore[arg-type]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+ "\u001b[0;32m~/Documents/.config/packman/miniconda3/envs/teaching/lib/python3.8/site-packages/pandas/_libs/lib.pyx\u001b[0m in \u001b[0;36mpandas._libs.lib.map_infer\u001b[0;34m()\u001b[0m\n",
+ "\u001b[0;32m~/Documents/.config/packman/miniconda3/envs/teaching/lib/python3.8/site-packages/pandas/core/tools/numeric.py\u001b[0m in \u001b[0;36mto_numeric\u001b[0;34m(arg, errors, downcast)\u001b[0m\n\u001b[1;32m 181\u001b[0m \u001b[0mcoerce_numeric\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0merrors\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32min\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0;34m\"ignore\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"raise\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 182\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 183\u001b[0;31m values, _ = lib.maybe_convert_numeric(\n\u001b[0m\u001b[1;32m 184\u001b[0m \u001b[0mvalues\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mset\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcoerce_numeric\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcoerce_numeric\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 185\u001b[0m )\n",
+ "\u001b[0;32m~/Documents/.config/packman/miniconda3/envs/teaching/lib/python3.8/site-packages/pandas/_libs/lib.pyx\u001b[0m in \u001b[0;36mpandas._libs.lib.maybe_convert_numeric\u001b[0;34m()\u001b[0m\n",
+ "\u001b[0;31mValueError\u001b[0m: Unable to parse string \"Null\" at position 0"
+ ]
+ }
+ ],
+ "source": [
+ "suicides2 = suicides.apply(pd.to_numeric)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 60,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "suicides2 = suicides.apply(pd.to_numeric, errors='coerce')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 66,
+ "metadata": {
+ "collapsed": true,
+ "jupyter": {
+ "outputs_hidden": true
+ },
+ "tags": []
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[nan 'Null' 'Unknown']\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(data_raw['suicides_no'][suicides2.isna()].unique())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 112,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "ages = data_raw['age'].unique()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 113,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "map_ages = {'5-14 years': 0, \n",
+ " '15-24 years': 1,\n",
+ " '25-34 years': 2, \n",
+ " '35-54 years': 3,\n",
+ " '55-74 years': 4,\n",
+ " '75+ years': 5}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 116,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "data_raw['age_index'] = data_raw['age'].map(map_ages)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 104,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "data_raw['age'] = pd.to_numeric((data_raw['age'].str[:2]).str.replace('-', ''))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 117,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " country | \n",
+ " year | \n",
+ " sex | \n",
+ " age | \n",
+ " suicides_no | \n",
+ " population | \n",
+ " HDI for year | \n",
+ " gdp_for_year ($) | \n",
+ " age_index | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 9 | \n",
+ " Albania | \n",
+ " 1987 | \n",
+ " female | \n",
+ " 5-14 years | \n",
+ " NaN | \n",
+ " 311000 | \n",
+ " NaN | \n",
+ " 2,156,624,900 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " Albania | \n",
+ " 1987 | \n",
+ " female | \n",
+ " 15-24 years | \n",
+ " 14 | \n",
+ " 289700 | \n",
+ " NaN | \n",
+ " 2,156,624,900 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " | 7 | \n",
+ " Albania | \n",
+ " 1987 | \n",
+ " female | \n",
+ " 25-34 years | \n",
+ " 4 | \n",
+ " 257200 | \n",
+ " NaN | \n",
+ " 2,156,624,900 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " | 6 | \n",
+ " Albania | \n",
+ " 1987 | \n",
+ " female | \n",
+ " 35-54 years | \n",
+ " 6 | \n",
+ " 278800 | \n",
+ " NaN | \n",
+ " 2,156,624,900 | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ " | 10 | \n",
+ " Albania | \n",
+ " 1987 | \n",
+ " female | \n",
+ " 55-74 years | \n",
+ " NaN | \n",
+ " 144600 | \n",
+ " NaN | \n",
+ " 2,156,624,900 | \n",
+ " 4 | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 27812 | \n",
+ " Uzbekistan | \n",
+ " 2014 | \n",
+ " male | \n",
+ " 15-24 years | \n",
+ " 347 | \n",
+ " 3126905 | \n",
+ " 0.675 | \n",
+ " 63,067,077,179 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " | 27809 | \n",
+ " Uzbekistan | \n",
+ " 2014 | \n",
+ " male | \n",
+ " 25-34 years | \n",
+ " 318 | \n",
+ " 2739150 | \n",
+ " 0.675 | \n",
+ " 63,067,077,179 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " | 27808 | \n",
+ " Uzbekistan | \n",
+ " 2014 | \n",
+ " male | \n",
+ " 35-54 years | \n",
+ " 519 | \n",
+ " 3421300 | \n",
+ " 0.675 | \n",
+ " 63,067,077,179 | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ " | 27811 | \n",
+ " Uzbekistan | \n",
+ " 2014 | \n",
+ " male | \n",
+ " 55-74 years | \n",
+ " 144 | \n",
+ " 1271111 | \n",
+ " 0.675 | \n",
+ " 63,067,077,179 | \n",
+ " 4 | \n",
+ "
\n",
+ " \n",
+ " | 27813 | \n",
+ " Uzbekistan | \n",
+ " 2014 | \n",
+ " male | \n",
+ " 75+ years | \n",
+ " 17 | \n",
+ " 224995 | \n",
+ " 0.675 | \n",
+ " 63,067,077,179 | \n",
+ " 5 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
27840 rows × 9 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " country year sex age suicides_no population \\\n",
+ "9 Albania 1987 female 5-14 years NaN 311000 \n",
+ "2 Albania 1987 female 15-24 years 14 289700 \n",
+ "7 Albania 1987 female 25-34 years 4 257200 \n",
+ "6 Albania 1987 female 35-54 years 6 278800 \n",
+ "10 Albania 1987 female 55-74 years NaN 144600 \n",
+ "... ... ... ... ... ... ... \n",
+ "27812 Uzbekistan 2014 male 15-24 years 347 3126905 \n",
+ "27809 Uzbekistan 2014 male 25-34 years 318 2739150 \n",
+ "27808 Uzbekistan 2014 male 35-54 years 519 3421300 \n",
+ "27811 Uzbekistan 2014 male 55-74 years 144 1271111 \n",
+ "27813 Uzbekistan 2014 male 75+ years 17 224995 \n",
+ "\n",
+ " HDI for year gdp_for_year ($) age_index \n",
+ "9 NaN 2,156,624,900 0 \n",
+ "2 NaN 2,156,624,900 1 \n",
+ "7 NaN 2,156,624,900 2 \n",
+ "6 NaN 2,156,624,900 3 \n",
+ "10 NaN 2,156,624,900 4 \n",
+ "... ... ... ... \n",
+ "27812 0.675 63,067,077,179 1 \n",
+ "27809 0.675 63,067,077,179 2 \n",
+ "27808 0.675 63,067,077,179 3 \n",
+ "27811 0.675 63,067,077,179 4 \n",
+ "27813 0.675 63,067,077,179 5 \n",
+ "\n",
+ "[27840 rows x 9 columns]"
+ ]
+ },
+ "execution_count": 117,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "data_raw.sort_values(['country', 'year', 'sex', 'age_index'])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 97,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'18'"
+ ]
+ },
+ "execution_count": 97,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "this_var = \"18-33 age\"\n",
+ "this_var[:2]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "data_raw['age'].str.extract"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "data_raw['age'].str.extract"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "data_raw['age'].str.find"
+ ]
+ },
{
"cell_type": "code",
"execution_count": 12,
@@ -2743,9 +3251,7 @@
{
"cell_type": "code",
"execution_count": 21,
- "metadata": {
- "scrolled": false
- },
+ "metadata": {},
"outputs": [
{
"data": {
@@ -5007,7 +5513,7 @@
"metadata": {
"anaconda-cloud": {},
"kernelspec": {
- "display_name": "Python 3",
+ "display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
@@ -5021,7 +5527,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.8.8"
+ "version": "3.8.12"
}
},
"nbformat": 4,