|
46 | 46 | "metadata": {}, |
47 | 47 | "outputs": [], |
48 | 48 | "source": [ |
| 49 | + "# Read file as a list of lines\n", |
49 | 50 | "speech_raw = open(datapath / 'dream.txt').readlines() \n", |
50 | | - "speech = [i.strip() for i in speech_raw]" |
51 | | - ] |
52 | | - }, |
53 | | - { |
54 | | - "cell_type": "code", |
55 | | - "execution_count": null, |
56 | | - "id": "a536464e", |
57 | | - "metadata": {}, |
58 | | - "outputs": [], |
59 | | - "source": [ |
60 | | - "words = []\n", |
61 | | - "for i in speech:\n", |
62 | | - " words.append(i)\n", |
63 | | - "words" |
| 51 | + "speech = [i.strip() for i in speech_raw] # Remove white space around each line" |
64 | 52 | ] |
65 | 53 | }, |
66 | 54 | { |
|
70 | 58 | "metadata": {}, |
71 | 59 | "outputs": [], |
72 | 60 | "source": [ |
73 | | - "words = ' '.join(words)\n", |
74 | | - "words" |
| 61 | + "# join all lines as one big string\n", |
| 62 | + "speech_string = ' '.join(speech)\n", |
| 63 | + "speech_string" |
75 | 64 | ] |
76 | 65 | }, |
77 | 66 | { |
|
82 | 71 | "outputs": [], |
83 | 72 | "source": [ |
84 | 73 | "# put this all in lower case\n", |
85 | | - "words = words.lower()" |
| 74 | + "speech_string = speech_string.lower()" |
86 | 75 | ] |
87 | 76 | }, |
88 | 77 | { |
|
95 | 84 | "# get rid of punctuation using replace and replacing with empty string (e.g. '')\n", |
96 | 85 | "punks = ['.',',',':',';','!','-']\n", |
97 | 86 | "for cp in punks:\n", |
98 | | - " words = words.replace(cp, '')" |
| 87 | + " speech_string = speech_string.replace(cp, '')" |
99 | 88 | ] |
100 | 89 | }, |
101 | 90 | { |
102 | 91 | "cell_type": "code", |
103 | 92 | "execution_count": null, |
104 | 93 | "id": "b755a4ce", |
105 | | - "metadata": {}, |
| 94 | + "metadata": { |
| 95 | + "scrolled": true |
| 96 | + }, |
106 | 97 | "outputs": [], |
107 | 98 | "source": [ |
108 | 99 | "# finally split on \"whitespace\" resulting in a list of words\n", |
109 | | - "words = words.split()\n", |
| 100 | + "words = speech_string.split()\n", |
110 | 101 | "words" |
111 | 102 | ] |
112 | 103 | }, |
|
292 | 283 | } |
293 | 284 | ], |
294 | 285 | "metadata": { |
| 286 | + "kernelspec": { |
| 287 | + "display_name": "Python 3 (ipykernel)", |
| 288 | + "language": "python", |
| 289 | + "name": "python3" |
| 290 | + }, |
295 | 291 | "language_info": { |
296 | | - "name": "python" |
| 292 | + "codemirror_mode": { |
| 293 | + "name": "ipython", |
| 294 | + "version": 3 |
| 295 | + }, |
| 296 | + "file_extension": ".py", |
| 297 | + "mimetype": "text/x-python", |
| 298 | + "name": "python", |
| 299 | + "nbconvert_exporter": "python", |
| 300 | + "pygments_lexer": "ipython3", |
| 301 | + "version": "3.11.10" |
297 | 302 | } |
298 | 303 | }, |
299 | 304 | "nbformat": 4, |
|
0 commit comments