Skip to content

Commit

Permalink
Add files via upload
Browse files Browse the repository at this point in the history
  • Loading branch information
Christopher-Thornton authored Aug 19, 2020
1 parent 912bbbc commit 6d7a293
Showing 1 changed file with 4 additions and 15 deletions.
19 changes: 4 additions & 15 deletions dev/model_building.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -58,16 +58,7 @@
"metadata": {},
"outputs": [],
"source": [
"alt_names = pd.read_csv('name_pairs.txt', sep=\",\", header=None)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"alt_names.columns = ['name_a', 'name_b']"
"alt_names = pd.read_csv('name_pairs.txt', sep=\",\", names=['name_a', 'name_b'], header=None)"
]
},
{
Expand Down Expand Up @@ -187,8 +178,7 @@
"import unidecode\n",
"from fuzzywuzzy import fuzz\n",
"\n",
"from syllable_tokenizer import SyllableTokenizer\n",
"ST = SyllableTokenizer()\n",
"import syllable_tokenizer\n",
"\n",
"from abydos.distance import (IterativeSubString, BISIM, DiscountedLevenshtein, Prefix, LCSstr, MLIPNS, Strcmp95,\n",
"MRA, Editex, SAPS, FlexMetric, JaroWinkler, HigueraMico, Sift4, Eudex, ALINE, Covington, PhoneticEditDistance)\n",
Expand Down Expand Up @@ -288,9 +278,8 @@
" df['name_b'] = df.apply(lambda row: re.sub(\n",
" '[^a-zA-Z]+', '', unidecode.unidecode(row['b']).lower().strip()), axis=1)\n",
" \n",
" df['syll_a'] = df.apply(lambda row: ST.tokenize(row.name_a), axis=1)\n",
" df['syll_a'] = df.apply(lambda row: ST.tokenize(row.name_a), axis=1)\n",
" df['syll_b'] = df.apply(lambda row: ST.tokenize(row.name_b), axis=1)\n",
" df['syll_a'] = df.apply(lambda row: syllable_tokenizer.tokenize(row.name_a), axis=1)\n",
" df['syll_b'] = df.apply(lambda row: syllable_tokenizer.tokenize(row.name_b), axis=1)\n",
" \n",
" df['partial'] = df.apply(lambda row: fuzz.partial_ratio(row.syll_a,row.syll_b), axis=1)\n",
" df['tkn_sort'] = df.apply(lambda row: fuzz.token_sort_ratio(row.syll_a,row.syll_b), axis=1)\n",
Expand Down

0 comments on commit 6d7a293

Please sign in to comment.