{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "Fitting with ScikitLearn - Part 2\n", "==========================\n", "
Overview
\n", "Questions
\n", "Objectives:
\n", "Keypoints:
\n", "\n", " | plate | \n", "row | \n", "col | \n", "base | \n", "base_cas_number | \n", "base_smiles | \n", "ligand | \n", "ligand_cas_number | \n", "ligand_smiles | \n", "aryl_halide_number | \n", "aryl_halide | \n", "aryl_halide_smiles | \n", "additive_number | \n", "additive | \n", "additive_smiles | \n", "product_smiles | \n", "yield | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "1 | \n", "1 | \n", "1 | \n", "P2Et | \n", "165535-45-5 | \n", "CN(C)P(N(C)C)(N(C)C)=NP(N(C)C)(N(C)C)=NCC | \n", "XPhos | \n", "564483-18-7 | \n", "CC(C)C1=CC(C(C)C)=CC(C(C)C)=C1C2=C(P(C3CCCCC3)... | \n", "1.0 | \n", "1-chloro-4-(trifluoromethyl)benzene | \n", "FC(F)(F)c1ccc(Cl)cc1 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "Cc1ccc(Nc2ccc(C(F)(F)F)cc2)cc1 | \n", "26.888615 | \n", "
1 | \n", "1 | \n", "1 | \n", "2 | \n", "P2Et | \n", "165535-45-5 | \n", "CN(C)P(N(C)C)(N(C)C)=NP(N(C)C)(N(C)C)=NCC | \n", "XPhos | \n", "564483-18-7 | \n", "CC(C)C1=CC(C(C)C)=CC(C(C)C)=C1C2=C(P(C3CCCCC3)... | \n", "2.0 | \n", "1-bromo-4-(trifluoromethyl)benzene | \n", "FC(F)(F)c1ccc(Br)cc1 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "Cc1ccc(Nc2ccc(C(F)(F)F)cc2)cc1 | \n", "24.063224 | \n", "
2 | \n", "1 | \n", "1 | \n", "3 | \n", "P2Et | \n", "165535-45-5 | \n", "CN(C)P(N(C)C)(N(C)C)=NP(N(C)C)(N(C)C)=NCC | \n", "XPhos | \n", "564483-18-7 | \n", "CC(C)C1=CC(C(C)C)=CC(C(C)C)=C1C2=C(P(C3CCCCC3)... | \n", "3.0 | \n", "1-iodo-4-(trifluoromethyl)benzene | \n", "FC(F)(F)c1ccc(I)cc1 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "Cc1ccc(Nc2ccc(C(F)(F)F)cc2)cc1 | \n", "47.515821 | \n", "
3 | \n", "1 | \n", "1 | \n", "4 | \n", "P2Et | \n", "165535-45-5 | \n", "CN(C)P(N(C)C)(N(C)C)=NP(N(C)C)(N(C)C)=NCC | \n", "XPhos | \n", "564483-18-7 | \n", "CC(C)C1=CC(C(C)C)=CC(C(C)C)=C1C2=C(P(C3CCCCC3)... | \n", "4.0 | \n", "1-chloro-4-methoxybenzene | \n", "COc1ccc(Cl)cc1 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "COc1ccc(Nc2ccc(C)cc2)cc1 | \n", "2.126831 | \n", "
4 | \n", "1 | \n", "1 | \n", "5 | \n", "P2Et | \n", "165535-45-5 | \n", "CN(C)P(N(C)C)(N(C)C)=NP(N(C)C)(N(C)C)=NCC | \n", "XPhos | \n", "564483-18-7 | \n", "CC(C)C1=CC(C(C)C)=CC(C(C)C)=C1C2=C(P(C3CCCCC3)... | \n", "5.0 | \n", "1-bromo-4-methoxybenzene | \n", "COc1ccc(Br)cc1 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "COc1ccc(Nc2ccc(C)cc2)cc1 | \n", "47.586354 | \n", "
\n", " | additive_*C3_NMR_shift | \n", "additive_*C3_electrostatic_charge | \n", "additive_*C4_NMR_shift | \n", "additive_*C4_electrostatic_charge | \n", "additive_*C5_NMR_shift | \n", "additive_*C5_electrostatic_charge | \n", "additive_*N1_electrostatic_charge | \n", "additive_*O1_electrostatic_charge | \n", "additive_E_HOMO | \n", "additive_E_LUMO | \n", "... | \n", "ligand_V6_intensity | \n", "ligand_V7_frequency | \n", "ligand_V7_intensity | \n", "ligand_V8_frequency | \n", "ligand_V8_intensity | \n", "ligand_V9_frequency | \n", "ligand_V9_intensity | \n", "ligand_dipole_moment | \n", "plate | \n", "row | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "143.12 | \n", "0.223 | \n", "93.06 | \n", "-0.447 | \n", "162.34 | \n", "0.292 | \n", "-0.334 | \n", "-0.057 | \n", "-0.2317 | \n", "-0.0487 | \n", "... | \n", "4.414 | \n", "3026.561 | \n", "16.577 | \n", "3043.097 | \n", "18.145 | \n", "3064.344 | \n", "38.21 | \n", "1.212924 | \n", "1 | \n", "1 | \n", "
1 | \n", "143.12 | \n", "0.223 | \n", "93.06 | \n", "-0.447 | \n", "162.34 | \n", "0.292 | \n", "-0.334 | \n", "-0.057 | \n", "-0.2317 | \n", "-0.0487 | \n", "... | \n", "4.414 | \n", "3026.561 | \n", "16.577 | \n", "3043.097 | \n", "18.145 | \n", "3064.344 | \n", "38.21 | \n", "1.212924 | \n", "1 | \n", "1 | \n", "
2 | \n", "143.12 | \n", "0.223 | \n", "93.06 | \n", "-0.447 | \n", "162.34 | \n", "0.292 | \n", "-0.334 | \n", "-0.057 | \n", "-0.2317 | \n", "-0.0487 | \n", "... | \n", "4.414 | \n", "3026.561 | \n", "16.577 | \n", "3043.097 | \n", "18.145 | \n", "3064.344 | \n", "38.21 | \n", "1.212924 | \n", "1 | \n", "1 | \n", "
3 | \n", "143.12 | \n", "0.223 | \n", "93.06 | \n", "-0.447 | \n", "162.34 | \n", "0.292 | \n", "-0.334 | \n", "-0.057 | \n", "-0.2317 | \n", "-0.0487 | \n", "... | \n", "4.414 | \n", "3026.561 | \n", "16.577 | \n", "3043.097 | \n", "18.145 | \n", "3064.344 | \n", "38.21 | \n", "1.212924 | \n", "1 | \n", "1 | \n", "
4 | \n", "143.12 | \n", "0.223 | \n", "93.06 | \n", "-0.447 | \n", "162.34 | \n", "0.292 | \n", "-0.334 | \n", "-0.057 | \n", "-0.2317 | \n", "-0.0487 | \n", "... | \n", "4.414 | \n", "3026.561 | \n", "16.577 | \n", "3043.097 | \n", "18.145 | \n", "3064.344 | \n", "38.21 | \n", "1.212924 | \n", "1 | \n", "1 | \n", "
5 rows × 123 columns
\n", "\n", " | additive_*C3_NMR_shift | \n", "additive_*C3_electrostatic_charge | \n", "additive_*C4_NMR_shift | \n", "additive_*C4_electrostatic_charge | \n", "additive_*C5_NMR_shift | \n", "additive_*C5_electrostatic_charge | \n", "additive_*N1_electrostatic_charge | \n", "additive_*O1_electrostatic_charge | \n", "additive_E_HOMO | \n", "additive_E_LUMO | \n", "... | \n", "ligand_cas_number | \n", "ligand_smiles | \n", "aryl_halide_number | \n", "aryl_halide | \n", "aryl_halide_smiles | \n", "additive_number | \n", "additive | \n", "additive_smiles | \n", "product_smiles | \n", "yield | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "143.12 | \n", "0.223 | \n", "93.06 | \n", "-0.447 | \n", "162.34 | \n", "0.292 | \n", "-0.334 | \n", "-0.057 | \n", "-0.2317 | \n", "-0.0487 | \n", "... | \n", "564483-18-7 | \n", "CC(C)C1=CC(C(C)C)=CC(C(C)C)=C1C2=C(P(C3CCCCC3)... | \n", "1.0 | \n", "1-chloro-4-(trifluoromethyl)benzene | \n", "FC(F)(F)c1ccc(Cl)cc1 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "Cc1ccc(Nc2ccc(C(F)(F)F)cc2)cc1 | \n", "26.888615 | \n", "
1 | \n", "143.12 | \n", "0.223 | \n", "93.06 | \n", "-0.447 | \n", "162.34 | \n", "0.292 | \n", "-0.334 | \n", "-0.057 | \n", "-0.2317 | \n", "-0.0487 | \n", "... | \n", "564483-18-7 | \n", "CC(C)C1=CC(C(C)C)=CC(C(C)C)=C1C2=C(P(C3CCCCC3)... | \n", "2.0 | \n", "1-bromo-4-(trifluoromethyl)benzene | \n", "FC(F)(F)c1ccc(Br)cc1 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "Cc1ccc(Nc2ccc(C(F)(F)F)cc2)cc1 | \n", "24.063224 | \n", "
2 | \n", "143.12 | \n", "0.223 | \n", "93.06 | \n", "-0.447 | \n", "162.34 | \n", "0.292 | \n", "-0.334 | \n", "-0.057 | \n", "-0.2317 | \n", "-0.0487 | \n", "... | \n", "564483-18-7 | \n", "CC(C)C1=CC(C(C)C)=CC(C(C)C)=C1C2=C(P(C3CCCCC3)... | \n", "3.0 | \n", "1-iodo-4-(trifluoromethyl)benzene | \n", "FC(F)(F)c1ccc(I)cc1 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "Cc1ccc(Nc2ccc(C(F)(F)F)cc2)cc1 | \n", "47.515821 | \n", "
3 | \n", "143.12 | \n", "0.223 | \n", "93.06 | \n", "-0.447 | \n", "162.34 | \n", "0.292 | \n", "-0.334 | \n", "-0.057 | \n", "-0.2317 | \n", "-0.0487 | \n", "... | \n", "564483-18-7 | \n", "CC(C)C1=CC(C(C)C)=CC(C(C)C)=C1C2=C(P(C3CCCCC3)... | \n", "4.0 | \n", "1-chloro-4-methoxybenzene | \n", "COc1ccc(Cl)cc1 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "COc1ccc(Nc2ccc(C)cc2)cc1 | \n", "2.126831 | \n", "
4 | \n", "143.12 | \n", "0.223 | \n", "93.06 | \n", "-0.447 | \n", "162.34 | \n", "0.292 | \n", "-0.334 | \n", "-0.057 | \n", "-0.2317 | \n", "-0.0487 | \n", "... | \n", "564483-18-7 | \n", "CC(C)C1=CC(C(C)C)=CC(C(C)C)=C1C2=C(P(C3CCCCC3)... | \n", "5.0 | \n", "1-bromo-4-methoxybenzene | \n", "COc1ccc(Br)cc1 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "COc1ccc(Nc2ccc(C)cc2)cc1 | \n", "47.586354 | \n", "
5 rows × 137 columns
\n", "