From 779f94753c9b75ae4b9af3d2f7604d5a059f9366 Mon Sep 17 00:00:00 2001 From: Qiao Date: Fri, 25 Oct 2019 10:00:47 -0500 Subject: [PATCH] use defaultdict in buildWordDict --- Chapter09_NaturalLanguages.ipynb | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/Chapter09_NaturalLanguages.ipynb b/Chapter09_NaturalLanguages.ipynb index 5d90fa0..a44b4e3 100644 --- a/Chapter09_NaturalLanguages.ipynb +++ b/Chapter09_NaturalLanguages.ipynb @@ -18,7 +18,7 @@ "from bs4 import BeautifulSoup\n", "import re\n", "import string\n", - "from collections import Counter\n", + "from collections import Counter, defaultdict\n", "\n", "def cleanSentence(sentence):\n", " sentence = sentence.split(' ')\n", @@ -187,13 +187,8 @@ " # Filter out empty words\n", " words = [word for word in words if word != '']\n", "\n", - " wordDict = {}\n", + " wordDict = defaultdict(lambda: defaultdict(int))\n", " for i in range(1, len(words)):\n", - " if words[i-1] not in wordDict:\n", - " # Create a new dictionary for this word\n", - " wordDict[words[i-1]] = {}\n", - " if words[i] not in wordDict[words[i-1]]:\n", - " wordDict[words[i-1]][words[i]] = 0\n", " wordDict[words[i-1]][words[i]] += 1\n", " return wordDict\n", "\n", @@ -305,8 +300,17 @@ "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.1" + }, + "pycharm": { + "stem_cell": { + "cell_type": "raw", + "source": [], + "metadata": { + "collapsed": false + } + } } }, "nbformat": 4, "nbformat_minor": 2 -} +} \ No newline at end of file