diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml
index 7a4aab61..ffd85a25 100644
--- a/.github/workflows/pytest.yml
+++ b/.github/workflows/pytest.yml
@@ -4,10 +4,15 @@ on: [push]
 
 jobs:
   build:
-    runs-on: ubuntu-latest
+    runs-on: ${{ matrix.os.host }}
     strategy:
       matrix:
         python-version: ["3.8", "3.9"]
+        os:
+          - name: ubuntu
+            host: ubuntu-latest
+          - name: windows
+            host: windows-latest
 
     steps:
       - uses: actions/checkout@v3
@@ -15,12 +20,30 @@ jobs:
         uses: actions/setup-python@v4
         with:
           python-version: ${{ matrix.python-version }}
-      - name: Install dependencies
+          cache: "pip"
+
+      - name: (ubuntu) Install dependencies
+        if: runner.os != 'windows'
         run: |
           python -m pip install --upgrade pip
           pip install -r requirements_dev.txt
           python -m spacy download en_core_web_sm
           pip install -e ."[test]"
-      - name: Test with pytest
+      - name: (ubuntu) Test with pytest
+        if: runner.os != 'windows'
+        run: |
+          pytest --verbose
+
+      - name: (windows) Install dependencies
+        if: runner.os == 'windows'
+        shell: bash
+        run: |
+          python -m pip install --upgrade pip
+          pip install -r requirements_dev.txt
+          python -m spacy download en_core_web_sm
+          pip install -e .
+      - name: (windows) Test with pytest
+        if: runner.os == 'windows'
+        shell: bash
         run: |
           pytest --verbose
diff --git a/README.md b/README.md
index 17635247..f2b055a1 100644
--- a/README.md
+++ b/README.md
@@ -29,16 +29,9 @@ You can use pip to install the library:
 pip install ojd-daps-skills
 ```
 
-You will also need to download [spaCy's](https://spacy.io/models/en) `en_core_web_sm` model:
+Note that this package was developed on MacOS and tested on Ubuntu. Changes have been made to be compatible on a Windows system but are not tested and cannot be guaranteed.
 
-```
-python -m spacy download en_core_web_sm
-```
-
-Note that this package was developed on MacOS and tested on Ubuntu. Changes have been made to be compatible on a Windows system but are not tested and cannot be guaranteed. 
-### AWS CLI
-
-When the package is first used it will automatically download a folder of neccessary data and models. This file is ~ 1GB. Although you don't need to have AWS credentials for this to work, you will need to download the [AWS CLI](https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html).
+When the package is first used it will automatically download a folder of neccessary data and models. (~1GB)
 
 ## TL;DR: Using Nesta's Skills Extractor library <a name="usage"></a>
 
@@ -152,10 +145,6 @@ git clone git@github.com:nestauk/ojd_daps_skills.git
   - `mkdir .cookiecutter/state`
   - `touch .cookiecutter/state/conda-create.log`
 - Run `make install` to configure the development environment
-- Download spacy model:
-  - `python -m spacy download en_core_web_sm`
-
-If you don't have the AWS CLI installed - you can download a zipped folder of the data [by clicking here](https://open-jobs-indicators.s3.eu-west-1.amazonaws.com/escoe_extension/ojd_daps_skills_data.zip). After downloading and unzipping, it is important that this folder is moved to the project's parent folder - i.e. `ojd_daps_skills/`.
 
 ### Project structure
 
diff --git a/docs/build/doctrees/about.doctree b/docs/build/doctrees/about.doctree
index 55411246..cf681ec1 100644
Binary files a/docs/build/doctrees/about.doctree and b/docs/build/doctrees/about.doctree differ
diff --git a/docs/build/doctrees/custom_usage.doctree b/docs/build/doctrees/custom_usage.doctree
index bf0602a2..f0b13a47 100644
Binary files a/docs/build/doctrees/custom_usage.doctree and b/docs/build/doctrees/custom_usage.doctree differ
diff --git a/docs/build/doctrees/environment.pickle b/docs/build/doctrees/environment.pickle
index 230395d8..8dc0b74c 100644
Binary files a/docs/build/doctrees/environment.pickle and b/docs/build/doctrees/environment.pickle differ
diff --git a/docs/build/doctrees/extract_skills.doctree b/docs/build/doctrees/extract_skills.doctree
index e391247d..79340b72 100644
Binary files a/docs/build/doctrees/extract_skills.doctree and b/docs/build/doctrees/extract_skills.doctree differ
diff --git a/docs/build/doctrees/index.doctree b/docs/build/doctrees/index.doctree
index 2817368f..a64fc3b7 100644
Binary files a/docs/build/doctrees/index.doctree and b/docs/build/doctrees/index.doctree differ
diff --git a/docs/build/doctrees/labelling.doctree b/docs/build/doctrees/labelling.doctree
index 0c44bbde..989b3f01 100644
Binary files a/docs/build/doctrees/labelling.doctree and b/docs/build/doctrees/labelling.doctree differ
diff --git a/docs/build/doctrees/license.doctree b/docs/build/doctrees/license.doctree
index 50004c37..104b29ed 100644
Binary files a/docs/build/doctrees/license.doctree and b/docs/build/doctrees/license.doctree differ
diff --git a/docs/build/doctrees/model_card.doctree b/docs/build/doctrees/model_card.doctree
index ccce3340..22f5fca3 100644
Binary files a/docs/build/doctrees/model_card.doctree and b/docs/build/doctrees/model_card.doctree differ
diff --git a/docs/build/doctrees/pipeline_summary.doctree b/docs/build/doctrees/pipeline_summary.doctree
index 84f1a440..0158cb91 100644
Binary files a/docs/build/doctrees/pipeline_summary.doctree and b/docs/build/doctrees/pipeline_summary.doctree differ
diff --git a/docs/build/html/_static/basic.css b/docs/build/html/_static/basic.css
index eeb0519a..1c79f9b4 100644
--- a/docs/build/html/_static/basic.css
+++ b/docs/build/html/_static/basic.css
@@ -12,241 +12,233 @@
 /* -- main layout ----------------------------------------------------------- */
 
 div.clearer {
-    clear: both;
+  clear: both;
 }
 
 div.section::after {
-    display: block;
-    content: '';
-    clear: left;
+  display: block;
+  content: "";
+  clear: left;
 }
 
 /* -- relbar ---------------------------------------------------------------- */
 
 div.related {
-    width: 100%;
-    font-size: 90%;
+  width: 100%;
+  font-size: 90%;
 }
 
 div.related h3 {
-    display: none;
+  display: none;
 }
 
 div.related ul {
-    margin: 0;
-    padding: 0 0 0 10px;
-    list-style: none;
+  margin: 0;
+  padding: 0 0 0 10px;
+  list-style: none;
 }
 
 div.related li {
-    display: inline;
+  display: inline;
 }
 
 div.related li.right {
-    float: right;
-    margin-right: 5px;
+  float: right;
+  margin-right: 5px;
 }
 
 /* -- sidebar --------------------------------------------------------------- */
 
 div.sphinxsidebarwrapper {
-    padding: 10px 5px 0 10px;
+  padding: 10px 5px 0 10px;
 }
 
 div.sphinxsidebar {
-    float: left;
-    width: 230px;
-    margin-left: -100%;
-    font-size: 90%;
-    word-wrap: break-word;
-    overflow-wrap : break-word;
+  float: left;
+  width: 230px;
+  margin-left: -100%;
+  font-size: 90%;
+  word-wrap: break-word;
+  overflow-wrap: break-word;
 }
 
 div.sphinxsidebar ul {
-    list-style: none;
+  list-style: none;
 }
 
 div.sphinxsidebar ul ul,
 div.sphinxsidebar ul.want-points {
-    margin-left: 20px;
-    list-style: square;
+  margin-left: 20px;
+  list-style: square;
 }
 
 div.sphinxsidebar ul ul {
-    margin-top: 0;
-    margin-bottom: 0;
+  margin-top: 0;
+  margin-bottom: 0;
 }
 
 div.sphinxsidebar form {
-    margin-top: 10px;
+  margin-top: 10px;
 }
 
 div.sphinxsidebar input {
-    border: 1px solid #98dbcc;
-    font-family: sans-serif;
-    font-size: 1em;
+  border: 1px solid #98dbcc;
+  font-family: sans-serif;
+  font-size: 1em;
 }
 
 div.sphinxsidebar #searchbox form.search {
-    overflow: hidden;
+  overflow: hidden;
 }
 
 div.sphinxsidebar #searchbox input[type="text"] {
-    float: left;
-    width: 80%;
-    padding: 0.25em;
-    box-sizing: border-box;
+  float: left;
+  width: 80%;
+  padding: 0.25em;
+  box-sizing: border-box;
 }
 
 div.sphinxsidebar #searchbox input[type="submit"] {
-    float: left;
-    width: 20%;
-    border-left: none;
-    padding: 0.25em;
-    box-sizing: border-box;
+  float: left;
+  width: 20%;
+  border-left: none;
+  padding: 0.25em;
+  box-sizing: border-box;
 }
 
-
 img {
-    border: 0;
-    max-width: 100%;
+  border: 0;
+  max-width: 100%;
 }
 
 /* -- search page ----------------------------------------------------------- */
 
 ul.search {
-    margin: 10px 0 0 20px;
-    padding: 0;
+  margin: 10px 0 0 20px;
+  padding: 0;
 }
 
 ul.search li {
-    padding: 5px 0 5px 20px;
-    background-image: url(file.png);
-    background-repeat: no-repeat;
-    background-position: 0 7px;
+  padding: 5px 0 5px 20px;
+  background-image: url(file.png);
+  background-repeat: no-repeat;
+  background-position: 0 7px;
 }
 
 ul.search li a {
-    font-weight: bold;
+  font-weight: bold;
 }
 
 ul.search li p.context {
-    color: #888;
-    margin: 2px 0 0 30px;
-    text-align: left;
+  color: #888;
+  margin: 2px 0 0 30px;
+  text-align: left;
 }
 
 ul.keywordmatches li.goodmatch a {
-    font-weight: bold;
+  font-weight: bold;
 }
 
 /* -- index page ------------------------------------------------------------ */
 
 table.contentstable {
-    width: 90%;
-    margin-left: auto;
-    margin-right: auto;
+  width: 90%;
+  margin-left: auto;
+  margin-right: auto;
 }
 
 table.contentstable p.biglink {
-    line-height: 150%;
+  line-height: 150%;
 }
 
 a.biglink {
-    font-size: 1.3em;
+  font-size: 1.3em;
 }
 
 span.linkdescr {
-    font-style: italic;
-    padding-top: 5px;
-    font-size: 90%;
+  font-style: italic;
+  padding-top: 5px;
+  font-size: 90%;
 }
 
 /* -- general index --------------------------------------------------------- */
 
 table.indextable {
-    width: 100%;
+  width: 100%;
 }
 
 table.indextable td {
-    text-align: left;
-    vertical-align: top;
+  text-align: left;
+  vertical-align: top;
 }
 
 table.indextable ul {
-    margin-top: 0;
-    margin-bottom: 0;
-    list-style-type: none;
+  margin-top: 0;
+  margin-bottom: 0;
+  list-style-type: none;
 }
 
 table.indextable > tbody > tr > td > ul {
-    padding-left: 0em;
+  padding-left: 0em;
 }
 
 table.indextable tr.pcap {
-    height: 10px;
+  height: 10px;
 }
 
 table.indextable tr.cap {
-    margin-top: 10px;
-    background-color: #f2f2f2;
+  margin-top: 10px;
+  background-color: #f2f2f2;
 }
 
 img.toggler {
-    margin-right: 3px;
-    margin-top: 3px;
-    cursor: pointer;
+  margin-right: 3px;
+  margin-top: 3px;
+  cursor: pointer;
 }
 
 div.modindex-jumpbox {
-    border-top: 1px solid #ddd;
-    border-bottom: 1px solid #ddd;
-    margin: 1em 0 1em 0;
-    padding: 0.4em;
+  border-top: 1px solid #ddd;
+  border-bottom: 1px solid #ddd;
+  margin: 1em 0 1em 0;
+  padding: 0.4em;
 }
 
 div.genindex-jumpbox {
-    border-top: 1px solid #ddd;
-    border-bottom: 1px solid #ddd;
-    margin: 1em 0 1em 0;
-    padding: 0.4em;
+  border-top: 1px solid #ddd;
+  border-bottom: 1px solid #ddd;
+  margin: 1em 0 1em 0;
+  padding: 0.4em;
 }
 
 /* -- domain module index --------------------------------------------------- */
 
 table.modindextable td {
-    padding: 2px;
-    border-collapse: collapse;
+  padding: 2px;
+  border-collapse: collapse;
 }
 
 /* -- general body styles --------------------------------------------------- */
 
 div.body {
-    min-width: 360px;
-    max-width: 800px;
+  min-width: 360px;
+  max-width: 800px;
 }
 
-div.body p, div.body dd, div.body li, div.body blockquote {
-    -moz-hyphens: auto;
-    -ms-hyphens: auto;
-    -webkit-hyphens: auto;
-    hyphens: auto;
+div.body p,
+div.body dd,
+div.body li,
+div.body blockquote {
+  -moz-hyphens: auto;
+  -ms-hyphens: auto;
+  -webkit-hyphens: auto;
+  hyphens: auto;
 }
 
 a.headerlink {
-    visibility: hidden;
-}
-a.brackets:before,
-span.brackets > a:before{
-    content: "[";
+  visibility: hidden;
 }
 
-a.brackets:after,
-span.brackets > a:after {
-    content: "]";
-}
-
-
 h1:hover > a.headerlink,
 h2:hover > a.headerlink,
 h3:hover > a.headerlink,
@@ -257,213 +249,237 @@ dt:hover > a.headerlink,
 caption:hover > a.headerlink,
 p.caption:hover > a.headerlink,
 div.code-block-caption:hover > a.headerlink {
-    visibility: visible;
+  visibility: visible;
 }
 
 div.body p.caption {
-    text-align: inherit;
+  text-align: inherit;
 }
 
 div.body td {
-    text-align: left;
+  text-align: left;
 }
 
 .first {
-    margin-top: 0 !important;
+  margin-top: 0 !important;
 }
 
 p.rubric {
-    margin-top: 30px;
-    font-weight: bold;
+  margin-top: 30px;
+  font-weight: bold;
 }
 
-img.align-left, figure.align-left, .figure.align-left, object.align-left {
-    clear: left;
-    float: left;
-    margin-right: 1em;
+img.align-left,
+figure.align-left,
+.figure.align-left,
+object.align-left {
+  clear: left;
+  float: left;
+  margin-right: 1em;
 }
 
-img.align-right, figure.align-right, .figure.align-right, object.align-right {
-    clear: right;
-    float: right;
-    margin-left: 1em;
+img.align-right,
+figure.align-right,
+.figure.align-right,
+object.align-right {
+  clear: right;
+  float: right;
+  margin-left: 1em;
 }
 
-img.align-center, figure.align-center, .figure.align-center, object.align-center {
+img.align-center,
+figure.align-center,
+.figure.align-center,
+object.align-center {
   display: block;
   margin-left: auto;
   margin-right: auto;
 }
 
-img.align-default, figure.align-default, .figure.align-default {
+img.align-default,
+figure.align-default,
+.figure.align-default {
   display: block;
   margin-left: auto;
   margin-right: auto;
 }
 
 .align-left {
-    text-align: left;
+  text-align: left;
 }
 
 .align-center {
-    text-align: center;
+  text-align: center;
 }
 
 .align-default {
-    text-align: center;
+  text-align: center;
 }
 
 .align-right {
-    text-align: right;
+  text-align: right;
 }
 
 /* -- sidebars -------------------------------------------------------------- */
 
 div.sidebar,
 aside.sidebar {
-    margin: 0 0 0.5em 1em;
-    border: 1px solid #ddb;
-    padding: 7px;
-    background-color: #ffe;
-    width: 40%;
-    float: right;
-    clear: right;
-    overflow-x: auto;
+  margin: 0 0 0.5em 1em;
+  border: 1px solid #ddb;
+  padding: 7px;
+  background-color: #ffe;
+  width: 40%;
+  float: right;
+  clear: right;
+  overflow-x: auto;
 }
 
 p.sidebar-title {
-    font-weight: bold;
+  font-weight: bold;
 }
-div.admonition, div.topic, blockquote {
-    clear: left;
+nav.contents,
+aside.topic,
+div.admonition,
+div.topic,
+blockquote {
+  clear: left;
 }
 
 /* -- topics ---------------------------------------------------------------- */
+nav.contents,
+aside.topic,
 div.topic {
-    border: 1px solid #ccc;
-    padding: 7px;
-    margin: 10px 0 10px 0;
+  border: 1px solid #ccc;
+  padding: 7px;
+  margin: 10px 0 10px 0;
 }
 
 p.topic-title {
-    font-size: 1.1em;
-    font-weight: bold;
-    margin-top: 10px;
+  font-size: 1.1em;
+  font-weight: bold;
+  margin-top: 10px;
 }
 
 /* -- admonitions ----------------------------------------------------------- */
 
 div.admonition {
-    margin-top: 10px;
-    margin-bottom: 10px;
-    padding: 7px;
+  margin-top: 10px;
+  margin-bottom: 10px;
+  padding: 7px;
 }
 
 div.admonition dt {
-    font-weight: bold;
+  font-weight: bold;
 }
 
 p.admonition-title {
-    margin: 0px 10px 5px 0px;
-    font-weight: bold;
+  margin: 0px 10px 5px 0px;
+  font-weight: bold;
 }
 
 div.body p.centered {
-    text-align: center;
-    margin-top: 25px;
+  text-align: center;
+  margin-top: 25px;
 }
 
 /* -- content of sidebars/topics/admonitions -------------------------------- */
 
 div.sidebar > :last-child,
 aside.sidebar > :last-child,
+nav.contents > :last-child,
+aside.topic > :last-child,
 div.topic > :last-child,
 div.admonition > :last-child {
-    margin-bottom: 0;
+  margin-bottom: 0;
 }
 
 div.sidebar::after,
 aside.sidebar::after,
+nav.contents::after,
+aside.topic::after,
 div.topic::after,
 div.admonition::after,
 blockquote::after {
-    display: block;
-    content: '';
-    clear: both;
+  display: block;
+  content: "";
+  clear: both;
 }
 
 /* -- tables ---------------------------------------------------------------- */
 
 table.docutils {
-    margin-top: 10px;
-    margin-bottom: 10px;
-    border: 0;
-    border-collapse: collapse;
+  margin-top: 10px;
+  margin-bottom: 10px;
+  border: 0;
+  border-collapse: collapse;
 }
 
 table.align-center {
-    margin-left: auto;
-    margin-right: auto;
+  margin-left: auto;
+  margin-right: auto;
 }
 
 table.align-default {
-    margin-left: auto;
-    margin-right: auto;
+  margin-left: auto;
+  margin-right: auto;
 }
 
 table caption span.caption-number {
-    font-style: italic;
+  font-style: italic;
 }
 
 table caption span.caption-text {
 }
 
-table.docutils td, table.docutils th {
-    padding: 1px 8px 1px 5px;
-    border-top: 0;
-    border-left: 0;
-    border-right: 0;
-    border-bottom: 1px solid #aaa;
+table.docutils td,
+table.docutils th {
+  padding: 1px 8px 1px 5px;
+  border-top: 0;
+  border-left: 0;
+  border-right: 0;
+  border-bottom: 1px solid #aaa;
 }
 
 th {
-    text-align: left;
-    padding-right: 5px;
+  text-align: left;
+  padding-right: 5px;
 }
 
 table.citation {
-    border-left: solid 1px gray;
-    margin-left: 1px;
+  border-left: solid 1px gray;
+  margin-left: 1px;
 }
 
 table.citation td {
-    border-bottom: none;
+  border-bottom: none;
 }
 
 th > :first-child,
 td > :first-child {
-    margin-top: 0px;
+  margin-top: 0px;
 }
 
 th > :last-child,
 td > :last-child {
-    margin-bottom: 0px;
+  margin-bottom: 0px;
 }
 
 /* -- figures --------------------------------------------------------------- */
 
-div.figure, figure {
-    margin: 0.5em;
-    padding: 0.5em;
+div.figure,
+figure {
+  margin: 0.5em;
+  padding: 0.5em;
 }
 
-div.figure p.caption, figcaption {
-    padding: 0.3em;
+div.figure p.caption,
+figcaption {
+  padding: 0.3em;
 }
 
 div.figure p.caption span.caption-number,
 figcaption span.caption-number {
-    font-style: italic;
+  font-style: italic;
 }
 
 div.figure p.caption span.caption-text,
@@ -472,349 +488,365 @@ figcaption span.caption-text {
 
 /* -- field list styles ----------------------------------------------------- */
 
-table.field-list td, table.field-list th {
-    border: 0 !important;
+table.field-list td,
+table.field-list th {
+  border: 0 !important;
 }
 
 .field-list ul {
-    margin: 0;
-    padding-left: 1em;
+  margin: 0;
+  padding-left: 1em;
 }
 
 .field-list p {
-    margin: 0;
+  margin: 0;
 }
 
 .field-name {
-    -moz-hyphens: manual;
-    -ms-hyphens: manual;
-    -webkit-hyphens: manual;
-    hyphens: manual;
+  -moz-hyphens: manual;
+  -ms-hyphens: manual;
+  -webkit-hyphens: manual;
+  hyphens: manual;
 }
 
 /* -- hlist styles ---------------------------------------------------------- */
 
 table.hlist {
-    margin: 1em 0;
+  margin: 1em 0;
 }
 
 table.hlist td {
-    vertical-align: top;
+  vertical-align: top;
 }
 
 /* -- object description styles --------------------------------------------- */
 
 .sig {
-	font-family: 'Consolas', 'Menlo', 'DejaVu Sans Mono', 'Bitstream Vera Sans Mono', monospace;
+  font-family: "Consolas", "Menlo", "DejaVu Sans Mono",
+    "Bitstream Vera Sans Mono", monospace;
 }
 
-.sig-name, code.descname {
-    background-color: transparent;
-    font-weight: bold;
+.sig-name,
+code.descname {
+  background-color: transparent;
+  font-weight: bold;
 }
 
 .sig-name {
-	font-size: 1.1em;
+  font-size: 1.1em;
 }
 
 code.descname {
-    font-size: 1.2em;
+  font-size: 1.2em;
 }
 
-.sig-prename, code.descclassname {
-    background-color: transparent;
+.sig-prename,
+code.descclassname {
+  background-color: transparent;
 }
 
 .optional {
-    font-size: 1.3em;
+  font-size: 1.3em;
 }
 
 .sig-paren {
-    font-size: larger;
+  font-size: larger;
 }
 
 .sig-param.n {
-	font-style: italic;
+  font-style: italic;
 }
 
 /* C++ specific styling */
 
 .sig-inline.c-texpr,
 .sig-inline.cpp-texpr {
-	font-family: unset;
+  font-family: unset;
 }
 
-.sig.c   .k, .sig.c   .kt,
-.sig.cpp .k, .sig.cpp .kt {
-	color: #0033B3;
+.sig.c .k,
+.sig.c .kt,
+.sig.cpp .k,
+.sig.cpp .kt {
+  color: #0033b3;
 }
 
-.sig.c   .m,
+.sig.c .m,
 .sig.cpp .m {
-	color: #1750EB;
+  color: #1750eb;
 }
 
-.sig.c   .s, .sig.c   .sc,
-.sig.cpp .s, .sig.cpp .sc {
-	color: #067D17;
+.sig.c .s,
+.sig.c .sc,
+.sig.cpp .s,
+.sig.cpp .sc {
+  color: #067d17;
 }
 
-
 /* -- other body styles ----------------------------------------------------- */
 
 ol.arabic {
-    list-style: decimal;
+  list-style: decimal;
 }
 
 ol.loweralpha {
-    list-style: lower-alpha;
+  list-style: lower-alpha;
 }
 
 ol.upperalpha {
-    list-style: upper-alpha;
+  list-style: upper-alpha;
 }
 
 ol.lowerroman {
-    list-style: lower-roman;
+  list-style: lower-roman;
 }
 
 ol.upperroman {
-    list-style: upper-roman;
+  list-style: upper-roman;
 }
 
 :not(li) > ol > li:first-child > :first-child,
 :not(li) > ul > li:first-child > :first-child {
-    margin-top: 0px;
+  margin-top: 0px;
 }
 
 :not(li) > ol > li:last-child > :last-child,
 :not(li) > ul > li:last-child > :last-child {
-    margin-bottom: 0px;
+  margin-bottom: 0px;
 }
 
 ol.simple ol p,
 ol.simple ul p,
 ul.simple ol p,
 ul.simple ul p {
-    margin-top: 0;
+  margin-top: 0;
 }
 
 ol.simple > li:not(:first-child) > p,
 ul.simple > li:not(:first-child) > p {
-    margin-top: 0;
+  margin-top: 0;
 }
 
 ol.simple p,
 ul.simple p {
-    margin-bottom: 0;
+  margin-bottom: 0;
 }
-dl.footnote > dt,
-dl.citation > dt {
-    float: left;
-    margin-right: 0.5em;
+aside.footnote > span,
+div.citation > span {
+  float: left;
 }
-
-dl.footnote > dd,
-dl.citation > dd {
-    margin-bottom: 0em;
+aside.footnote > span:last-of-type,
+div.citation > span:last-of-type {
+  padding-right: 0.5em;
 }
-
-dl.footnote > dd:after,
-dl.citation > dd:after {
-    content: "";
-    clear: both;
+aside.footnote > p {
+  margin-left: 2em;
+}
+div.citation > p {
+  margin-left: 4em;
+}
+aside.footnote > p:last-of-type,
+div.citation > p:last-of-type {
+  margin-bottom: 0em;
+}
+aside.footnote > p:last-of-type:after,
+div.citation > p:last-of-type:after {
+  content: "";
+  clear: both;
 }
 
 dl.field-list {
-    display: grid;
-    grid-template-columns: fit-content(30%) auto;
+  display: grid;
+  grid-template-columns: fit-content(30%) auto;
 }
 
 dl.field-list > dt {
-    font-weight: bold;
-    word-break: break-word;
-    padding-left: 0.5em;
-    padding-right: 5px;
-}
-dl.field-list > dt:after {
-    content: ":";
+  font-weight: bold;
+  word-break: break-word;
+  padding-left: 0.5em;
+  padding-right: 5px;
 }
 
-
 dl.field-list > dd {
-    padding-left: 0.5em;
-    margin-top: 0em;
-    margin-left: 0em;
-    margin-bottom: 0em;
+  padding-left: 0.5em;
+  margin-top: 0em;
+  margin-left: 0em;
+  margin-bottom: 0em;
 }
 
 dl {
-    margin-bottom: 15px;
+  margin-bottom: 15px;
 }
 
 dd > :first-child {
-    margin-top: 0px;
+  margin-top: 0px;
 }
 
-dd ul, dd table {
-    margin-bottom: 10px;
+dd ul,
+dd table {
+  margin-bottom: 10px;
 }
 
 dd {
-    margin-top: 3px;
-    margin-bottom: 10px;
-    margin-left: 30px;
+  margin-top: 3px;
+  margin-bottom: 10px;
+  margin-left: 30px;
 }
 
 dl > dd:last-child,
 dl > dd:last-child > :last-child {
-    margin-bottom: 0;
+  margin-bottom: 0;
 }
 
-dt:target, span.highlighted {
-    background-color: #fbe54e;
+dt:target,
+span.highlighted {
+  background-color: #fbe54e;
 }
 
 rect.highlighted {
-    fill: #fbe54e;
+  fill: #fbe54e;
 }
 
 dl.glossary dt {
-    font-weight: bold;
-    font-size: 1.1em;
+  font-weight: bold;
+  font-size: 1.1em;
 }
 
 .versionmodified {
-    font-style: italic;
+  font-style: italic;
 }
 
 .system-message {
-    background-color: #fda;
-    padding: 5px;
-    border: 3px solid red;
+  background-color: #fda;
+  padding: 5px;
+  border: 3px solid red;
 }
 
-.footnote:target  {
-    background-color: #ffa;
+.footnote:target {
+  background-color: #ffa;
 }
 
 .line-block {
-    display: block;
-    margin-top: 1em;
-    margin-bottom: 1em;
+  display: block;
+  margin-top: 1em;
+  margin-bottom: 1em;
 }
 
 .line-block .line-block {
-    margin-top: 0;
-    margin-bottom: 0;
-    margin-left: 1.5em;
+  margin-top: 0;
+  margin-bottom: 0;
+  margin-left: 1.5em;
 }
 
-.guilabel, .menuselection {
-    font-family: sans-serif;
+.guilabel,
+.menuselection {
+  font-family: sans-serif;
 }
 
 .accelerator {
-    text-decoration: underline;
+  text-decoration: underline;
 }
 
 .classifier {
-    font-style: oblique;
+  font-style: oblique;
 }
 
 .classifier:before {
-    font-style: normal;
-    margin: 0 0.5em;
-    content: ":";
-    display: inline-block;
+  font-style: normal;
+  margin: 0 0.5em;
+  content: ":";
+  display: inline-block;
 }
 
-abbr, acronym {
-    border-bottom: dotted 1px;
-    cursor: help;
+abbr,
+acronym {
+  border-bottom: dotted 1px;
+  cursor: help;
 }
 
 /* -- code displays --------------------------------------------------------- */
 
 pre {
-    overflow: auto;
-    overflow-y: hidden;  /* fixes display issues on Chrome browsers */
+  overflow: auto;
+  overflow-y: hidden; /* fixes display issues on Chrome browsers */
 }
 
-pre, div[class*="highlight-"] {
-    clear: both;
+pre,
+div[class*="highlight-"] {
+  clear: both;
 }
 
 span.pre {
-    -moz-hyphens: none;
-    -ms-hyphens: none;
-    -webkit-hyphens: none;
-    hyphens: none;
-    white-space: nowrap;
+  -moz-hyphens: none;
+  -ms-hyphens: none;
+  -webkit-hyphens: none;
+  hyphens: none;
+  white-space: nowrap;
 }
 
 div[class*="highlight-"] {
-    margin: 1em 0;
+  margin: 1em 0;
 }
 
 td.linenos pre {
-    border: 0;
-    background-color: transparent;
-    color: #aaa;
+  border: 0;
+  background-color: transparent;
+  color: #aaa;
 }
 
 table.highlighttable {
-    display: block;
+  display: block;
 }
 
 table.highlighttable tbody {
-    display: block;
+  display: block;
 }
 
 table.highlighttable tr {
-    display: flex;
+  display: flex;
 }
 
 table.highlighttable td {
-    margin: 0;
-    padding: 0;
+  margin: 0;
+  padding: 0;
 }
 
 table.highlighttable td.linenos {
-    padding-right: 0.5em;
+  padding-right: 0.5em;
 }
 
 table.highlighttable td.code {
-    flex: 1;
-    overflow: hidden;
+  flex: 1;
+  overflow: hidden;
 }
 
 .highlight .hll {
-    display: block;
+  display: block;
 }
 
 div.highlight pre,
 table.highlighttable pre {
-    margin: 0;
+  margin: 0;
 }
 
 div.code-block-caption + div {
-    margin-top: 0;
+  margin-top: 0;
 }
 
 div.code-block-caption {
-    margin-top: 1em;
-    padding: 2px 5px;
-    font-size: small;
+  margin-top: 1em;
+  padding: 2px 5px;
+  font-size: small;
 }
 
 div.code-block-caption code {
-    background-color: transparent;
+  background-color: transparent;
 }
 
 table.highlighttable td.linenos,
 span.linenos,
-div.highlight span.gp {  /* gp: Generic.Prompt */
+div.highlight span.gp {
+  /* gp: Generic.Prompt */
   user-select: none;
   -webkit-user-select: text; /* Safari fallback only */
   -webkit-user-select: none; /* Chrome/Safari */
@@ -823,77 +855,83 @@ div.highlight span.gp {  /* gp: Generic.Prompt */
 }
 
 div.code-block-caption span.caption-number {
-    padding: 0.1em 0.3em;
-    font-style: italic;
+  padding: 0.1em 0.3em;
+  font-style: italic;
 }
 
 div.code-block-caption span.caption-text {
 }
 
 div.literal-block-wrapper {
-    margin: 1em 0;
+  margin: 1em 0;
 }
 
-code.xref, a code {
-    background-color: transparent;
-    font-weight: bold;
+code.xref,
+a code {
+  background-color: transparent;
+  font-weight: bold;
 }
 
-h1 code, h2 code, h3 code, h4 code, h5 code, h6 code {
-    background-color: transparent;
+h1 code,
+h2 code,
+h3 code,
+h4 code,
+h5 code,
+h6 code {
+  background-color: transparent;
 }
 
 .viewcode-link {
-    float: right;
+  float: right;
 }
 
 .viewcode-back {
-    float: right;
-    font-family: sans-serif;
+  float: right;
+  font-family: sans-serif;
 }
 
 div.viewcode-block:target {
-    margin: -1px -10px;
-    padding: 0 10px;
+  margin: -1px -10px;
+  padding: 0 10px;
 }
 
 /* -- math display ---------------------------------------------------------- */
 
 img.math {
-    vertical-align: middle;
+  vertical-align: middle;
 }
 
 div.body div.math p {
-    text-align: center;
+  text-align: center;
 }
 
 span.eqno {
-    float: right;
+  float: right;
 }
 
 span.eqno a.headerlink {
-    position: absolute;
-    z-index: 1;
+  position: absolute;
+  z-index: 1;
 }
 
 div.math:hover a.headerlink {
-    visibility: visible;
+  visibility: visible;
 }
 
 /* -- printout stylesheet --------------------------------------------------- */
 
 @media print {
-    div.document,
-    div.documentwrapper,
-    div.bodywrapper {
-        margin: 0 !important;
-        width: 100%;
-    }
-
-    div.sphinxsidebar,
-    div.related,
-    div.footer,
-    #top-link {
-        display: none;
-    }
-}
\ No newline at end of file
+  div.document,
+  div.documentwrapper,
+  div.bodywrapper {
+    margin: 0 !important;
+    width: 100%;
+  }
+
+  div.sphinxsidebar,
+  div.related,
+  div.footer,
+  #top-link {
+    display: none;
+  }
+}
diff --git a/docs/build/html/_static/pygments.css b/docs/build/html/_static/pygments.css
index 75471509..e7e91a11 100644
--- a/docs/build/html/_static/pygments.css
+++ b/docs/build/html/_static/pygments.css
@@ -1,255 +1,892 @@
-.highlight pre { line-height: 125%; }
-.highlight td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; }
-.highlight span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; }
-.highlight td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; }
-.highlight span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; }
-.highlight .hll { background-color: #ffffcc }
-.highlight { background: #f8f8f8; }
-.highlight .c { color: #8f5902; font-style: italic } /* Comment */
-.highlight .err { color: #a40000; border: 1px solid #ef2929 } /* Error */
-.highlight .g { color: #000000 } /* Generic */
-.highlight .k { color: #204a87; font-weight: bold } /* Keyword */
-.highlight .l { color: #000000 } /* Literal */
-.highlight .n { color: #000000 } /* Name */
-.highlight .o { color: #ce5c00; font-weight: bold } /* Operator */
-.highlight .x { color: #000000 } /* Other */
-.highlight .p { color: #000000; font-weight: bold } /* Punctuation */
-.highlight .ch { color: #8f5902; font-style: italic } /* Comment.Hashbang */
-.highlight .cm { color: #8f5902; font-style: italic } /* Comment.Multiline */
-.highlight .cp { color: #8f5902; font-style: italic } /* Comment.Preproc */
-.highlight .cpf { color: #8f5902; font-style: italic } /* Comment.PreprocFile */
-.highlight .c1 { color: #8f5902; font-style: italic } /* Comment.Single */
-.highlight .cs { color: #8f5902; font-style: italic } /* Comment.Special */
-.highlight .gd { color: #a40000 } /* Generic.Deleted */
-.highlight .ge { color: #000000; font-style: italic } /* Generic.Emph */
-.highlight .gr { color: #ef2929 } /* Generic.Error */
-.highlight .gh { color: #000080; font-weight: bold } /* Generic.Heading */
-.highlight .gi { color: #00A000 } /* Generic.Inserted */
-.highlight .go { color: #000000; font-style: italic } /* Generic.Output */
-.highlight .gp { color: #8f5902 } /* Generic.Prompt */
-.highlight .gs { color: #000000; font-weight: bold } /* Generic.Strong */
-.highlight .gu { color: #800080; font-weight: bold } /* Generic.Subheading */
-.highlight .gt { color: #a40000; font-weight: bold } /* Generic.Traceback */
-.highlight .kc { color: #204a87; font-weight: bold } /* Keyword.Constant */
-.highlight .kd { color: #204a87; font-weight: bold } /* Keyword.Declaration */
-.highlight .kn { color: #204a87; font-weight: bold } /* Keyword.Namespace */
-.highlight .kp { color: #204a87; font-weight: bold } /* Keyword.Pseudo */
-.highlight .kr { color: #204a87; font-weight: bold } /* Keyword.Reserved */
-.highlight .kt { color: #204a87; font-weight: bold } /* Keyword.Type */
-.highlight .ld { color: #000000 } /* Literal.Date */
-.highlight .m { color: #0000cf; font-weight: bold } /* Literal.Number */
-.highlight .s { color: #4e9a06 } /* Literal.String */
-.highlight .na { color: #c4a000 } /* Name.Attribute */
-.highlight .nb { color: #204a87 } /* Name.Builtin */
-.highlight .nc { color: #000000 } /* Name.Class */
-.highlight .no { color: #000000 } /* Name.Constant */
-.highlight .nd { color: #5c35cc; font-weight: bold } /* Name.Decorator */
-.highlight .ni { color: #ce5c00 } /* Name.Entity */
-.highlight .ne { color: #cc0000; font-weight: bold } /* Name.Exception */
-.highlight .nf { color: #000000 } /* Name.Function */
-.highlight .nl { color: #f57900 } /* Name.Label */
-.highlight .nn { color: #000000 } /* Name.Namespace */
-.highlight .nx { color: #000000 } /* Name.Other */
-.highlight .py { color: #000000 } /* Name.Property */
-.highlight .nt { color: #204a87; font-weight: bold } /* Name.Tag */
-.highlight .nv { color: #000000 } /* Name.Variable */
-.highlight .ow { color: #204a87; font-weight: bold } /* Operator.Word */
-.highlight .pm { color: #000000; font-weight: bold } /* Punctuation.Marker */
-.highlight .w { color: #f8f8f8 } /* Text.Whitespace */
-.highlight .mb { color: #0000cf; font-weight: bold } /* Literal.Number.Bin */
-.highlight .mf { color: #0000cf; font-weight: bold } /* Literal.Number.Float */
-.highlight .mh { color: #0000cf; font-weight: bold } /* Literal.Number.Hex */
-.highlight .mi { color: #0000cf; font-weight: bold } /* Literal.Number.Integer */
-.highlight .mo { color: #0000cf; font-weight: bold } /* Literal.Number.Oct */
-.highlight .sa { color: #4e9a06 } /* Literal.String.Affix */
-.highlight .sb { color: #4e9a06 } /* Literal.String.Backtick */
-.highlight .sc { color: #4e9a06 } /* Literal.String.Char */
-.highlight .dl { color: #4e9a06 } /* Literal.String.Delimiter */
-.highlight .sd { color: #8f5902; font-style: italic } /* Literal.String.Doc */
-.highlight .s2 { color: #4e9a06 } /* Literal.String.Double */
-.highlight .se { color: #4e9a06 } /* Literal.String.Escape */
-.highlight .sh { color: #4e9a06 } /* Literal.String.Heredoc */
-.highlight .si { color: #4e9a06 } /* Literal.String.Interpol */
-.highlight .sx { color: #4e9a06 } /* Literal.String.Other */
-.highlight .sr { color: #4e9a06 } /* Literal.String.Regex */
-.highlight .s1 { color: #4e9a06 } /* Literal.String.Single */
-.highlight .ss { color: #4e9a06 } /* Literal.String.Symbol */
-.highlight .bp { color: #3465a4 } /* Name.Builtin.Pseudo */
-.highlight .fm { color: #000000 } /* Name.Function.Magic */
-.highlight .vc { color: #000000 } /* Name.Variable.Class */
-.highlight .vg { color: #000000 } /* Name.Variable.Global */
-.highlight .vi { color: #000000 } /* Name.Variable.Instance */
-.highlight .vm { color: #000000 } /* Name.Variable.Magic */
-.highlight .il { color: #0000cf; font-weight: bold } /* Literal.Number.Integer.Long */
+.highlight pre {
+  line-height: 125%;
+}
+.highlight td.linenos .normal {
+  color: inherit;
+  background-color: transparent;
+  padding-left: 5px;
+  padding-right: 5px;
+}
+.highlight span.linenos {
+  color: inherit;
+  background-color: transparent;
+  padding-left: 5px;
+  padding-right: 5px;
+}
+.highlight td.linenos .special {
+  color: #000000;
+  background-color: #ffffc0;
+  padding-left: 5px;
+  padding-right: 5px;
+}
+.highlight span.linenos.special {
+  color: #000000;
+  background-color: #ffffc0;
+  padding-left: 5px;
+  padding-right: 5px;
+}
+.highlight .hll {
+  background-color: #ffffcc;
+}
+.highlight {
+  background: #f8f8f8;
+}
+.highlight .c {
+  color: #8f5902;
+  font-style: italic;
+} /* Comment */
+.highlight .err {
+  color: #a40000;
+  border: 1px solid #ef2929;
+} /* Error */
+.highlight .g {
+  color: #000000;
+} /* Generic */
+.highlight .k {
+  color: #204a87;
+  font-weight: bold;
+} /* Keyword */
+.highlight .l {
+  color: #000000;
+} /* Literal */
+.highlight .n {
+  color: #000000;
+} /* Name */
+.highlight .o {
+  color: #ce5c00;
+  font-weight: bold;
+} /* Operator */
+.highlight .x {
+  color: #000000;
+} /* Other */
+.highlight .p {
+  color: #000000;
+  font-weight: bold;
+} /* Punctuation */
+.highlight .ch {
+  color: #8f5902;
+  font-style: italic;
+} /* Comment.Hashbang */
+.highlight .cm {
+  color: #8f5902;
+  font-style: italic;
+} /* Comment.Multiline */
+.highlight .cp {
+  color: #8f5902;
+  font-style: italic;
+} /* Comment.Preproc */
+.highlight .cpf {
+  color: #8f5902;
+  font-style: italic;
+} /* Comment.PreprocFile */
+.highlight .c1 {
+  color: #8f5902;
+  font-style: italic;
+} /* Comment.Single */
+.highlight .cs {
+  color: #8f5902;
+  font-style: italic;
+} /* Comment.Special */
+.highlight .gd {
+  color: #a40000;
+} /* Generic.Deleted */
+.highlight .ge {
+  color: #000000;
+  font-style: italic;
+} /* Generic.Emph */
+.highlight .ges {
+  color: #000000;
+  font-weight: bold;
+  font-style: italic;
+} /* Generic.EmphStrong */
+.highlight .gr {
+  color: #ef2929;
+} /* Generic.Error */
+.highlight .gh {
+  color: #000080;
+  font-weight: bold;
+} /* Generic.Heading */
+.highlight .gi {
+  color: #00a000;
+} /* Generic.Inserted */
+.highlight .go {
+  color: #000000;
+  font-style: italic;
+} /* Generic.Output */
+.highlight .gp {
+  color: #8f5902;
+} /* Generic.Prompt */
+.highlight .gs {
+  color: #000000;
+  font-weight: bold;
+} /* Generic.Strong */
+.highlight .gu {
+  color: #800080;
+  font-weight: bold;
+} /* Generic.Subheading */
+.highlight .gt {
+  color: #a40000;
+  font-weight: bold;
+} /* Generic.Traceback */
+.highlight .kc {
+  color: #204a87;
+  font-weight: bold;
+} /* Keyword.Constant */
+.highlight .kd {
+  color: #204a87;
+  font-weight: bold;
+} /* Keyword.Declaration */
+.highlight .kn {
+  color: #204a87;
+  font-weight: bold;
+} /* Keyword.Namespace */
+.highlight .kp {
+  color: #204a87;
+  font-weight: bold;
+} /* Keyword.Pseudo */
+.highlight .kr {
+  color: #204a87;
+  font-weight: bold;
+} /* Keyword.Reserved */
+.highlight .kt {
+  color: #204a87;
+  font-weight: bold;
+} /* Keyword.Type */
+.highlight .ld {
+  color: #000000;
+} /* Literal.Date */
+.highlight .m {
+  color: #0000cf;
+  font-weight: bold;
+} /* Literal.Number */
+.highlight .s {
+  color: #4e9a06;
+} /* Literal.String */
+.highlight .na {
+  color: #c4a000;
+} /* Name.Attribute */
+.highlight .nb {
+  color: #204a87;
+} /* Name.Builtin */
+.highlight .nc {
+  color: #000000;
+} /* Name.Class */
+.highlight .no {
+  color: #000000;
+} /* Name.Constant */
+.highlight .nd {
+  color: #5c35cc;
+  font-weight: bold;
+} /* Name.Decorator */
+.highlight .ni {
+  color: #ce5c00;
+} /* Name.Entity */
+.highlight .ne {
+  color: #cc0000;
+  font-weight: bold;
+} /* Name.Exception */
+.highlight .nf {
+  color: #000000;
+} /* Name.Function */
+.highlight .nl {
+  color: #f57900;
+} /* Name.Label */
+.highlight .nn {
+  color: #000000;
+} /* Name.Namespace */
+.highlight .nx {
+  color: #000000;
+} /* Name.Other */
+.highlight .py {
+  color: #000000;
+} /* Name.Property */
+.highlight .nt {
+  color: #204a87;
+  font-weight: bold;
+} /* Name.Tag */
+.highlight .nv {
+  color: #000000;
+} /* Name.Variable */
+.highlight .ow {
+  color: #204a87;
+  font-weight: bold;
+} /* Operator.Word */
+.highlight .pm {
+  color: #000000;
+  font-weight: bold;
+} /* Punctuation.Marker */
+.highlight .w {
+  color: #f8f8f8;
+} /* Text.Whitespace */
+.highlight .mb {
+  color: #0000cf;
+  font-weight: bold;
+} /* Literal.Number.Bin */
+.highlight .mf {
+  color: #0000cf;
+  font-weight: bold;
+} /* Literal.Number.Float */
+.highlight .mh {
+  color: #0000cf;
+  font-weight: bold;
+} /* Literal.Number.Hex */
+.highlight .mi {
+  color: #0000cf;
+  font-weight: bold;
+} /* Literal.Number.Integer */
+.highlight .mo {
+  color: #0000cf;
+  font-weight: bold;
+} /* Literal.Number.Oct */
+.highlight .sa {
+  color: #4e9a06;
+} /* Literal.String.Affix */
+.highlight .sb {
+  color: #4e9a06;
+} /* Literal.String.Backtick */
+.highlight .sc {
+  color: #4e9a06;
+} /* Literal.String.Char */
+.highlight .dl {
+  color: #4e9a06;
+} /* Literal.String.Delimiter */
+.highlight .sd {
+  color: #8f5902;
+  font-style: italic;
+} /* Literal.String.Doc */
+.highlight .s2 {
+  color: #4e9a06;
+} /* Literal.String.Double */
+.highlight .se {
+  color: #4e9a06;
+} /* Literal.String.Escape */
+.highlight .sh {
+  color: #4e9a06;
+} /* Literal.String.Heredoc */
+.highlight .si {
+  color: #4e9a06;
+} /* Literal.String.Interpol */
+.highlight .sx {
+  color: #4e9a06;
+} /* Literal.String.Other */
+.highlight .sr {
+  color: #4e9a06;
+} /* Literal.String.Regex */
+.highlight .s1 {
+  color: #4e9a06;
+} /* Literal.String.Single */
+.highlight .ss {
+  color: #4e9a06;
+} /* Literal.String.Symbol */
+.highlight .bp {
+  color: #3465a4;
+} /* Name.Builtin.Pseudo */
+.highlight .fm {
+  color: #000000;
+} /* Name.Function.Magic */
+.highlight .vc {
+  color: #000000;
+} /* Name.Variable.Class */
+.highlight .vg {
+  color: #000000;
+} /* Name.Variable.Global */
+.highlight .vi {
+  color: #000000;
+} /* Name.Variable.Instance */
+.highlight .vm {
+  color: #000000;
+} /* Name.Variable.Magic */
+.highlight .il {
+  color: #0000cf;
+  font-weight: bold;
+} /* Literal.Number.Integer.Long */
 @media not print {
-body[data-theme="dark"] .highlight pre { line-height: 125%; }
-body[data-theme="dark"] .highlight td.linenos .normal { color: #aaaaaa; background-color: transparent; padding-left: 5px; padding-right: 5px; }
-body[data-theme="dark"] .highlight span.linenos { color: #aaaaaa; background-color: transparent; padding-left: 5px; padding-right: 5px; }
-body[data-theme="dark"] .highlight td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; }
-body[data-theme="dark"] .highlight span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; }
-body[data-theme="dark"] .highlight .hll { background-color: #404040 }
-body[data-theme="dark"] .highlight { background: #202020; color: #d0d0d0 }
-body[data-theme="dark"] .highlight .c { color: #ababab; font-style: italic } /* Comment */
-body[data-theme="dark"] .highlight .err { color: #a61717; background-color: #e3d2d2 } /* Error */
-body[data-theme="dark"] .highlight .esc { color: #d0d0d0 } /* Escape */
-body[data-theme="dark"] .highlight .g { color: #d0d0d0 } /* Generic */
-body[data-theme="dark"] .highlight .k { color: #6ebf26; font-weight: bold } /* Keyword */
-body[data-theme="dark"] .highlight .l { color: #d0d0d0 } /* Literal */
-body[data-theme="dark"] .highlight .n { color: #d0d0d0 } /* Name */
-body[data-theme="dark"] .highlight .o { color: #d0d0d0 } /* Operator */
-body[data-theme="dark"] .highlight .x { color: #d0d0d0 } /* Other */
-body[data-theme="dark"] .highlight .p { color: #d0d0d0 } /* Punctuation */
-body[data-theme="dark"] .highlight .ch { color: #ababab; font-style: italic } /* Comment.Hashbang */
-body[data-theme="dark"] .highlight .cm { color: #ababab; font-style: italic } /* Comment.Multiline */
-body[data-theme="dark"] .highlight .cp { color: #cd2828; font-weight: bold } /* Comment.Preproc */
-body[data-theme="dark"] .highlight .cpf { color: #ababab; font-style: italic } /* Comment.PreprocFile */
-body[data-theme="dark"] .highlight .c1 { color: #ababab; font-style: italic } /* Comment.Single */
-body[data-theme="dark"] .highlight .cs { color: #e50808; font-weight: bold; background-color: #520000 } /* Comment.Special */
-body[data-theme="dark"] .highlight .gd { color: #d22323 } /* Generic.Deleted */
-body[data-theme="dark"] .highlight .ge { color: #d0d0d0; font-style: italic } /* Generic.Emph */
-body[data-theme="dark"] .highlight .gr { color: #d22323 } /* Generic.Error */
-body[data-theme="dark"] .highlight .gh { color: #ffffff; font-weight: bold } /* Generic.Heading */
-body[data-theme="dark"] .highlight .gi { color: #589819 } /* Generic.Inserted */
-body[data-theme="dark"] .highlight .go { color: #cccccc } /* Generic.Output */
-body[data-theme="dark"] .highlight .gp { color: #aaaaaa } /* Generic.Prompt */
-body[data-theme="dark"] .highlight .gs { color: #d0d0d0; font-weight: bold } /* Generic.Strong */
-body[data-theme="dark"] .highlight .gu { color: #ffffff; text-decoration: underline } /* Generic.Subheading */
-body[data-theme="dark"] .highlight .gt { color: #d22323 } /* Generic.Traceback */
-body[data-theme="dark"] .highlight .kc { color: #6ebf26; font-weight: bold } /* Keyword.Constant */
-body[data-theme="dark"] .highlight .kd { color: #6ebf26; font-weight: bold } /* Keyword.Declaration */
-body[data-theme="dark"] .highlight .kn { color: #6ebf26; font-weight: bold } /* Keyword.Namespace */
-body[data-theme="dark"] .highlight .kp { color: #6ebf26 } /* Keyword.Pseudo */
-body[data-theme="dark"] .highlight .kr { color: #6ebf26; font-weight: bold } /* Keyword.Reserved */
-body[data-theme="dark"] .highlight .kt { color: #6ebf26; font-weight: bold } /* Keyword.Type */
-body[data-theme="dark"] .highlight .ld { color: #d0d0d0 } /* Literal.Date */
-body[data-theme="dark"] .highlight .m { color: #51b2fd } /* Literal.Number */
-body[data-theme="dark"] .highlight .s { color: #ed9d13 } /* Literal.String */
-body[data-theme="dark"] .highlight .na { color: #bbbbbb } /* Name.Attribute */
-body[data-theme="dark"] .highlight .nb { color: #2fbccd } /* Name.Builtin */
-body[data-theme="dark"] .highlight .nc { color: #71adff; text-decoration: underline } /* Name.Class */
-body[data-theme="dark"] .highlight .no { color: #40ffff } /* Name.Constant */
-body[data-theme="dark"] .highlight .nd { color: #ffa500 } /* Name.Decorator */
-body[data-theme="dark"] .highlight .ni { color: #d0d0d0 } /* Name.Entity */
-body[data-theme="dark"] .highlight .ne { color: #bbbbbb } /* Name.Exception */
-body[data-theme="dark"] .highlight .nf { color: #71adff } /* Name.Function */
-body[data-theme="dark"] .highlight .nl { color: #d0d0d0 } /* Name.Label */
-body[data-theme="dark"] .highlight .nn { color: #71adff; text-decoration: underline } /* Name.Namespace */
-body[data-theme="dark"] .highlight .nx { color: #d0d0d0 } /* Name.Other */
-body[data-theme="dark"] .highlight .py { color: #d0d0d0 } /* Name.Property */
-body[data-theme="dark"] .highlight .nt { color: #6ebf26; font-weight: bold } /* Name.Tag */
-body[data-theme="dark"] .highlight .nv { color: #40ffff } /* Name.Variable */
-body[data-theme="dark"] .highlight .ow { color: #6ebf26; font-weight: bold } /* Operator.Word */
-body[data-theme="dark"] .highlight .pm { color: #d0d0d0 } /* Punctuation.Marker */
-body[data-theme="dark"] .highlight .w { color: #666666 } /* Text.Whitespace */
-body[data-theme="dark"] .highlight .mb { color: #51b2fd } /* Literal.Number.Bin */
-body[data-theme="dark"] .highlight .mf { color: #51b2fd } /* Literal.Number.Float */
-body[data-theme="dark"] .highlight .mh { color: #51b2fd } /* Literal.Number.Hex */
-body[data-theme="dark"] .highlight .mi { color: #51b2fd } /* Literal.Number.Integer */
-body[data-theme="dark"] .highlight .mo { color: #51b2fd } /* Literal.Number.Oct */
-body[data-theme="dark"] .highlight .sa { color: #ed9d13 } /* Literal.String.Affix */
-body[data-theme="dark"] .highlight .sb { color: #ed9d13 } /* Literal.String.Backtick */
-body[data-theme="dark"] .highlight .sc { color: #ed9d13 } /* Literal.String.Char */
-body[data-theme="dark"] .highlight .dl { color: #ed9d13 } /* Literal.String.Delimiter */
-body[data-theme="dark"] .highlight .sd { color: #ed9d13 } /* Literal.String.Doc */
-body[data-theme="dark"] .highlight .s2 { color: #ed9d13 } /* Literal.String.Double */
-body[data-theme="dark"] .highlight .se { color: #ed9d13 } /* Literal.String.Escape */
-body[data-theme="dark"] .highlight .sh { color: #ed9d13 } /* Literal.String.Heredoc */
-body[data-theme="dark"] .highlight .si { color: #ed9d13 } /* Literal.String.Interpol */
-body[data-theme="dark"] .highlight .sx { color: #ffa500 } /* Literal.String.Other */
-body[data-theme="dark"] .highlight .sr { color: #ed9d13 } /* Literal.String.Regex */
-body[data-theme="dark"] .highlight .s1 { color: #ed9d13 } /* Literal.String.Single */
-body[data-theme="dark"] .highlight .ss { color: #ed9d13 } /* Literal.String.Symbol */
-body[data-theme="dark"] .highlight .bp { color: #2fbccd } /* Name.Builtin.Pseudo */
-body[data-theme="dark"] .highlight .fm { color: #71adff } /* Name.Function.Magic */
-body[data-theme="dark"] .highlight .vc { color: #40ffff } /* Name.Variable.Class */
-body[data-theme="dark"] .highlight .vg { color: #40ffff } /* Name.Variable.Global */
-body[data-theme="dark"] .highlight .vi { color: #40ffff } /* Name.Variable.Instance */
-body[data-theme="dark"] .highlight .vm { color: #40ffff } /* Name.Variable.Magic */
-body[data-theme="dark"] .highlight .il { color: #51b2fd } /* Literal.Number.Integer.Long */
-@media (prefers-color-scheme: dark) {
-body:not([data-theme="light"]) .highlight pre { line-height: 125%; }
-body:not([data-theme="light"]) .highlight td.linenos .normal { color: #aaaaaa; background-color: transparent; padding-left: 5px; padding-right: 5px; }
-body:not([data-theme="light"]) .highlight span.linenos { color: #aaaaaa; background-color: transparent; padding-left: 5px; padding-right: 5px; }
-body:not([data-theme="light"]) .highlight td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; }
-body:not([data-theme="light"]) .highlight span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; }
-body:not([data-theme="light"]) .highlight .hll { background-color: #404040 }
-body:not([data-theme="light"]) .highlight { background: #202020; color: #d0d0d0 }
-body:not([data-theme="light"]) .highlight .c { color: #ababab; font-style: italic } /* Comment */
-body:not([data-theme="light"]) .highlight .err { color: #a61717; background-color: #e3d2d2 } /* Error */
-body:not([data-theme="light"]) .highlight .esc { color: #d0d0d0 } /* Escape */
-body:not([data-theme="light"]) .highlight .g { color: #d0d0d0 } /* Generic */
-body:not([data-theme="light"]) .highlight .k { color: #6ebf26; font-weight: bold } /* Keyword */
-body:not([data-theme="light"]) .highlight .l { color: #d0d0d0 } /* Literal */
-body:not([data-theme="light"]) .highlight .n { color: #d0d0d0 } /* Name */
-body:not([data-theme="light"]) .highlight .o { color: #d0d0d0 } /* Operator */
-body:not([data-theme="light"]) .highlight .x { color: #d0d0d0 } /* Other */
-body:not([data-theme="light"]) .highlight .p { color: #d0d0d0 } /* Punctuation */
-body:not([data-theme="light"]) .highlight .ch { color: #ababab; font-style: italic } /* Comment.Hashbang */
-body:not([data-theme="light"]) .highlight .cm { color: #ababab; font-style: italic } /* Comment.Multiline */
-body:not([data-theme="light"]) .highlight .cp { color: #cd2828; font-weight: bold } /* Comment.Preproc */
-body:not([data-theme="light"]) .highlight .cpf { color: #ababab; font-style: italic } /* Comment.PreprocFile */
-body:not([data-theme="light"]) .highlight .c1 { color: #ababab; font-style: italic } /* Comment.Single */
-body:not([data-theme="light"]) .highlight .cs { color: #e50808; font-weight: bold; background-color: #520000 } /* Comment.Special */
-body:not([data-theme="light"]) .highlight .gd { color: #d22323 } /* Generic.Deleted */
-body:not([data-theme="light"]) .highlight .ge { color: #d0d0d0; font-style: italic } /* Generic.Emph */
-body:not([data-theme="light"]) .highlight .gr { color: #d22323 } /* Generic.Error */
-body:not([data-theme="light"]) .highlight .gh { color: #ffffff; font-weight: bold } /* Generic.Heading */
-body:not([data-theme="light"]) .highlight .gi { color: #589819 } /* Generic.Inserted */
-body:not([data-theme="light"]) .highlight .go { color: #cccccc } /* Generic.Output */
-body:not([data-theme="light"]) .highlight .gp { color: #aaaaaa } /* Generic.Prompt */
-body:not([data-theme="light"]) .highlight .gs { color: #d0d0d0; font-weight: bold } /* Generic.Strong */
-body:not([data-theme="light"]) .highlight .gu { color: #ffffff; text-decoration: underline } /* Generic.Subheading */
-body:not([data-theme="light"]) .highlight .gt { color: #d22323 } /* Generic.Traceback */
-body:not([data-theme="light"]) .highlight .kc { color: #6ebf26; font-weight: bold } /* Keyword.Constant */
-body:not([data-theme="light"]) .highlight .kd { color: #6ebf26; font-weight: bold } /* Keyword.Declaration */
-body:not([data-theme="light"]) .highlight .kn { color: #6ebf26; font-weight: bold } /* Keyword.Namespace */
-body:not([data-theme="light"]) .highlight .kp { color: #6ebf26 } /* Keyword.Pseudo */
-body:not([data-theme="light"]) .highlight .kr { color: #6ebf26; font-weight: bold } /* Keyword.Reserved */
-body:not([data-theme="light"]) .highlight .kt { color: #6ebf26; font-weight: bold } /* Keyword.Type */
-body:not([data-theme="light"]) .highlight .ld { color: #d0d0d0 } /* Literal.Date */
-body:not([data-theme="light"]) .highlight .m { color: #51b2fd } /* Literal.Number */
-body:not([data-theme="light"]) .highlight .s { color: #ed9d13 } /* Literal.String */
-body:not([data-theme="light"]) .highlight .na { color: #bbbbbb } /* Name.Attribute */
-body:not([data-theme="light"]) .highlight .nb { color: #2fbccd } /* Name.Builtin */
-body:not([data-theme="light"]) .highlight .nc { color: #71adff; text-decoration: underline } /* Name.Class */
-body:not([data-theme="light"]) .highlight .no { color: #40ffff } /* Name.Constant */
-body:not([data-theme="light"]) .highlight .nd { color: #ffa500 } /* Name.Decorator */
-body:not([data-theme="light"]) .highlight .ni { color: #d0d0d0 } /* Name.Entity */
-body:not([data-theme="light"]) .highlight .ne { color: #bbbbbb } /* Name.Exception */
-body:not([data-theme="light"]) .highlight .nf { color: #71adff } /* Name.Function */
-body:not([data-theme="light"]) .highlight .nl { color: #d0d0d0 } /* Name.Label */
-body:not([data-theme="light"]) .highlight .nn { color: #71adff; text-decoration: underline } /* Name.Namespace */
-body:not([data-theme="light"]) .highlight .nx { color: #d0d0d0 } /* Name.Other */
-body:not([data-theme="light"]) .highlight .py { color: #d0d0d0 } /* Name.Property */
-body:not([data-theme="light"]) .highlight .nt { color: #6ebf26; font-weight: bold } /* Name.Tag */
-body:not([data-theme="light"]) .highlight .nv { color: #40ffff } /* Name.Variable */
-body:not([data-theme="light"]) .highlight .ow { color: #6ebf26; font-weight: bold } /* Operator.Word */
-body:not([data-theme="light"]) .highlight .pm { color: #d0d0d0 } /* Punctuation.Marker */
-body:not([data-theme="light"]) .highlight .w { color: #666666 } /* Text.Whitespace */
-body:not([data-theme="light"]) .highlight .mb { color: #51b2fd } /* Literal.Number.Bin */
-body:not([data-theme="light"]) .highlight .mf { color: #51b2fd } /* Literal.Number.Float */
-body:not([data-theme="light"]) .highlight .mh { color: #51b2fd } /* Literal.Number.Hex */
-body:not([data-theme="light"]) .highlight .mi { color: #51b2fd } /* Literal.Number.Integer */
-body:not([data-theme="light"]) .highlight .mo { color: #51b2fd } /* Literal.Number.Oct */
-body:not([data-theme="light"]) .highlight .sa { color: #ed9d13 } /* Literal.String.Affix */
-body:not([data-theme="light"]) .highlight .sb { color: #ed9d13 } /* Literal.String.Backtick */
-body:not([data-theme="light"]) .highlight .sc { color: #ed9d13 } /* Literal.String.Char */
-body:not([data-theme="light"]) .highlight .dl { color: #ed9d13 } /* Literal.String.Delimiter */
-body:not([data-theme="light"]) .highlight .sd { color: #ed9d13 } /* Literal.String.Doc */
-body:not([data-theme="light"]) .highlight .s2 { color: #ed9d13 } /* Literal.String.Double */
-body:not([data-theme="light"]) .highlight .se { color: #ed9d13 } /* Literal.String.Escape */
-body:not([data-theme="light"]) .highlight .sh { color: #ed9d13 } /* Literal.String.Heredoc */
-body:not([data-theme="light"]) .highlight .si { color: #ed9d13 } /* Literal.String.Interpol */
-body:not([data-theme="light"]) .highlight .sx { color: #ffa500 } /* Literal.String.Other */
-body:not([data-theme="light"]) .highlight .sr { color: #ed9d13 } /* Literal.String.Regex */
-body:not([data-theme="light"]) .highlight .s1 { color: #ed9d13 } /* Literal.String.Single */
-body:not([data-theme="light"]) .highlight .ss { color: #ed9d13 } /* Literal.String.Symbol */
-body:not([data-theme="light"]) .highlight .bp { color: #2fbccd } /* Name.Builtin.Pseudo */
-body:not([data-theme="light"]) .highlight .fm { color: #71adff } /* Name.Function.Magic */
-body:not([data-theme="light"]) .highlight .vc { color: #40ffff } /* Name.Variable.Class */
-body:not([data-theme="light"]) .highlight .vg { color: #40ffff } /* Name.Variable.Global */
-body:not([data-theme="light"]) .highlight .vi { color: #40ffff } /* Name.Variable.Instance */
-body:not([data-theme="light"]) .highlight .vm { color: #40ffff } /* Name.Variable.Magic */
-body:not([data-theme="light"]) .highlight .il { color: #51b2fd } /* Literal.Number.Integer.Long */
+  body[data-theme="dark"] .highlight pre {
+    line-height: 125%;
+  }
+  body[data-theme="dark"] .highlight td.linenos .normal {
+    color: #aaaaaa;
+    background-color: transparent;
+    padding-left: 5px;
+    padding-right: 5px;
+  }
+  body[data-theme="dark"] .highlight span.linenos {
+    color: #aaaaaa;
+    background-color: transparent;
+    padding-left: 5px;
+    padding-right: 5px;
+  }
+  body[data-theme="dark"] .highlight td.linenos .special {
+    color: #000000;
+    background-color: #ffffc0;
+    padding-left: 5px;
+    padding-right: 5px;
+  }
+  body[data-theme="dark"] .highlight span.linenos.special {
+    color: #000000;
+    background-color: #ffffc0;
+    padding-left: 5px;
+    padding-right: 5px;
+  }
+  body[data-theme="dark"] .highlight .hll {
+    background-color: #404040;
+  }
+  body[data-theme="dark"] .highlight {
+    background: #202020;
+    color: #d0d0d0;
+  }
+  body[data-theme="dark"] .highlight .c {
+    color: #ababab;
+    font-style: italic;
+  } /* Comment */
+  body[data-theme="dark"] .highlight .err {
+    color: #a61717;
+    background-color: #e3d2d2;
+  } /* Error */
+  body[data-theme="dark"] .highlight .esc {
+    color: #d0d0d0;
+  } /* Escape */
+  body[data-theme="dark"] .highlight .g {
+    color: #d0d0d0;
+  } /* Generic */
+  body[data-theme="dark"] .highlight .k {
+    color: #6ebf26;
+    font-weight: bold;
+  } /* Keyword */
+  body[data-theme="dark"] .highlight .l {
+    color: #d0d0d0;
+  } /* Literal */
+  body[data-theme="dark"] .highlight .n {
+    color: #d0d0d0;
+  } /* Name */
+  body[data-theme="dark"] .highlight .o {
+    color: #d0d0d0;
+  } /* Operator */
+  body[data-theme="dark"] .highlight .x {
+    color: #d0d0d0;
+  } /* Other */
+  body[data-theme="dark"] .highlight .p {
+    color: #d0d0d0;
+  } /* Punctuation */
+  body[data-theme="dark"] .highlight .ch {
+    color: #ababab;
+    font-style: italic;
+  } /* Comment.Hashbang */
+  body[data-theme="dark"] .highlight .cm {
+    color: #ababab;
+    font-style: italic;
+  } /* Comment.Multiline */
+  body[data-theme="dark"] .highlight .cp {
+    color: #ff3a3a;
+    font-weight: bold;
+  } /* Comment.Preproc */
+  body[data-theme="dark"] .highlight .cpf {
+    color: #ababab;
+    font-style: italic;
+  } /* Comment.PreprocFile */
+  body[data-theme="dark"] .highlight .c1 {
+    color: #ababab;
+    font-style: italic;
+  } /* Comment.Single */
+  body[data-theme="dark"] .highlight .cs {
+    color: #e50808;
+    font-weight: bold;
+    background-color: #520000;
+  } /* Comment.Special */
+  body[data-theme="dark"] .highlight .gd {
+    color: #d22323;
+  } /* Generic.Deleted */
+  body[data-theme="dark"] .highlight .ge {
+    color: #d0d0d0;
+    font-style: italic;
+  } /* Generic.Emph */
+  body[data-theme="dark"] .highlight .ges {
+    color: #d0d0d0;
+    font-weight: bold;
+    font-style: italic;
+  } /* Generic.EmphStrong */
+  body[data-theme="dark"] .highlight .gr {
+    color: #d22323;
+  } /* Generic.Error */
+  body[data-theme="dark"] .highlight .gh {
+    color: #ffffff;
+    font-weight: bold;
+  } /* Generic.Heading */
+  body[data-theme="dark"] .highlight .gi {
+    color: #589819;
+  } /* Generic.Inserted */
+  body[data-theme="dark"] .highlight .go {
+    color: #cccccc;
+  } /* Generic.Output */
+  body[data-theme="dark"] .highlight .gp {
+    color: #aaaaaa;
+  } /* Generic.Prompt */
+  body[data-theme="dark"] .highlight .gs {
+    color: #d0d0d0;
+    font-weight: bold;
+  } /* Generic.Strong */
+  body[data-theme="dark"] .highlight .gu {
+    color: #ffffff;
+    text-decoration: underline;
+  } /* Generic.Subheading */
+  body[data-theme="dark"] .highlight .gt {
+    color: #d22323;
+  } /* Generic.Traceback */
+  body[data-theme="dark"] .highlight .kc {
+    color: #6ebf26;
+    font-weight: bold;
+  } /* Keyword.Constant */
+  body[data-theme="dark"] .highlight .kd {
+    color: #6ebf26;
+    font-weight: bold;
+  } /* Keyword.Declaration */
+  body[data-theme="dark"] .highlight .kn {
+    color: #6ebf26;
+    font-weight: bold;
+  } /* Keyword.Namespace */
+  body[data-theme="dark"] .highlight .kp {
+    color: #6ebf26;
+  } /* Keyword.Pseudo */
+  body[data-theme="dark"] .highlight .kr {
+    color: #6ebf26;
+    font-weight: bold;
+  } /* Keyword.Reserved */
+  body[data-theme="dark"] .highlight .kt {
+    color: #6ebf26;
+    font-weight: bold;
+  } /* Keyword.Type */
+  body[data-theme="dark"] .highlight .ld {
+    color: #d0d0d0;
+  } /* Literal.Date */
+  body[data-theme="dark"] .highlight .m {
+    color: #51b2fd;
+  } /* Literal.Number */
+  body[data-theme="dark"] .highlight .s {
+    color: #ed9d13;
+  } /* Literal.String */
+  body[data-theme="dark"] .highlight .na {
+    color: #bbbbbb;
+  } /* Name.Attribute */
+  body[data-theme="dark"] .highlight .nb {
+    color: #2fbccd;
+  } /* Name.Builtin */
+  body[data-theme="dark"] .highlight .nc {
+    color: #71adff;
+    text-decoration: underline;
+  } /* Name.Class */
+  body[data-theme="dark"] .highlight .no {
+    color: #40ffff;
+  } /* Name.Constant */
+  body[data-theme="dark"] .highlight .nd {
+    color: #ffa500;
+  } /* Name.Decorator */
+  body[data-theme="dark"] .highlight .ni {
+    color: #d0d0d0;
+  } /* Name.Entity */
+  body[data-theme="dark"] .highlight .ne {
+    color: #bbbbbb;
+  } /* Name.Exception */
+  body[data-theme="dark"] .highlight .nf {
+    color: #71adff;
+  } /* Name.Function */
+  body[data-theme="dark"] .highlight .nl {
+    color: #d0d0d0;
+  } /* Name.Label */
+  body[data-theme="dark"] .highlight .nn {
+    color: #71adff;
+    text-decoration: underline;
+  } /* Name.Namespace */
+  body[data-theme="dark"] .highlight .nx {
+    color: #d0d0d0;
+  } /* Name.Other */
+  body[data-theme="dark"] .highlight .py {
+    color: #d0d0d0;
+  } /* Name.Property */
+  body[data-theme="dark"] .highlight .nt {
+    color: #6ebf26;
+    font-weight: bold;
+  } /* Name.Tag */
+  body[data-theme="dark"] .highlight .nv {
+    color: #40ffff;
+  } /* Name.Variable */
+  body[data-theme="dark"] .highlight .ow {
+    color: #6ebf26;
+    font-weight: bold;
+  } /* Operator.Word */
+  body[data-theme="dark"] .highlight .pm {
+    color: #d0d0d0;
+  } /* Punctuation.Marker */
+  body[data-theme="dark"] .highlight .w {
+    color: #666666;
+  } /* Text.Whitespace */
+  body[data-theme="dark"] .highlight .mb {
+    color: #51b2fd;
+  } /* Literal.Number.Bin */
+  body[data-theme="dark"] .highlight .mf {
+    color: #51b2fd;
+  } /* Literal.Number.Float */
+  body[data-theme="dark"] .highlight .mh {
+    color: #51b2fd;
+  } /* Literal.Number.Hex */
+  body[data-theme="dark"] .highlight .mi {
+    color: #51b2fd;
+  } /* Literal.Number.Integer */
+  body[data-theme="dark"] .highlight .mo {
+    color: #51b2fd;
+  } /* Literal.Number.Oct */
+  body[data-theme="dark"] .highlight .sa {
+    color: #ed9d13;
+  } /* Literal.String.Affix */
+  body[data-theme="dark"] .highlight .sb {
+    color: #ed9d13;
+  } /* Literal.String.Backtick */
+  body[data-theme="dark"] .highlight .sc {
+    color: #ed9d13;
+  } /* Literal.String.Char */
+  body[data-theme="dark"] .highlight .dl {
+    color: #ed9d13;
+  } /* Literal.String.Delimiter */
+  body[data-theme="dark"] .highlight .sd {
+    color: #ed9d13;
+  } /* Literal.String.Doc */
+  body[data-theme="dark"] .highlight .s2 {
+    color: #ed9d13;
+  } /* Literal.String.Double */
+  body[data-theme="dark"] .highlight .se {
+    color: #ed9d13;
+  } /* Literal.String.Escape */
+  body[data-theme="dark"] .highlight .sh {
+    color: #ed9d13;
+  } /* Literal.String.Heredoc */
+  body[data-theme="dark"] .highlight .si {
+    color: #ed9d13;
+  } /* Literal.String.Interpol */
+  body[data-theme="dark"] .highlight .sx {
+    color: #ffa500;
+  } /* Literal.String.Other */
+  body[data-theme="dark"] .highlight .sr {
+    color: #ed9d13;
+  } /* Literal.String.Regex */
+  body[data-theme="dark"] .highlight .s1 {
+    color: #ed9d13;
+  } /* Literal.String.Single */
+  body[data-theme="dark"] .highlight .ss {
+    color: #ed9d13;
+  } /* Literal.String.Symbol */
+  body[data-theme="dark"] .highlight .bp {
+    color: #2fbccd;
+  } /* Name.Builtin.Pseudo */
+  body[data-theme="dark"] .highlight .fm {
+    color: #71adff;
+  } /* Name.Function.Magic */
+  body[data-theme="dark"] .highlight .vc {
+    color: #40ffff;
+  } /* Name.Variable.Class */
+  body[data-theme="dark"] .highlight .vg {
+    color: #40ffff;
+  } /* Name.Variable.Global */
+  body[data-theme="dark"] .highlight .vi {
+    color: #40ffff;
+  } /* Name.Variable.Instance */
+  body[data-theme="dark"] .highlight .vm {
+    color: #40ffff;
+  } /* Name.Variable.Magic */
+  body[data-theme="dark"] .highlight .il {
+    color: #51b2fd;
+  } /* Literal.Number.Integer.Long */
+  @media (prefers-color-scheme: dark) {
+    body:not([data-theme="light"]) .highlight pre {
+      line-height: 125%;
+    }
+    body:not([data-theme="light"]) .highlight td.linenos .normal {
+      color: #aaaaaa;
+      background-color: transparent;
+      padding-left: 5px;
+      padding-right: 5px;
+    }
+    body:not([data-theme="light"]) .highlight span.linenos {
+      color: #aaaaaa;
+      background-color: transparent;
+      padding-left: 5px;
+      padding-right: 5px;
+    }
+    body:not([data-theme="light"]) .highlight td.linenos .special {
+      color: #000000;
+      background-color: #ffffc0;
+      padding-left: 5px;
+      padding-right: 5px;
+    }
+    body:not([data-theme="light"]) .highlight span.linenos.special {
+      color: #000000;
+      background-color: #ffffc0;
+      padding-left: 5px;
+      padding-right: 5px;
+    }
+    body:not([data-theme="light"]) .highlight .hll {
+      background-color: #404040;
+    }
+    body:not([data-theme="light"]) .highlight {
+      background: #202020;
+      color: #d0d0d0;
+    }
+    body:not([data-theme="light"]) .highlight .c {
+      color: #ababab;
+      font-style: italic;
+    } /* Comment */
+    body:not([data-theme="light"]) .highlight .err {
+      color: #a61717;
+      background-color: #e3d2d2;
+    } /* Error */
+    body:not([data-theme="light"]) .highlight .esc {
+      color: #d0d0d0;
+    } /* Escape */
+    body:not([data-theme="light"]) .highlight .g {
+      color: #d0d0d0;
+    } /* Generic */
+    body:not([data-theme="light"]) .highlight .k {
+      color: #6ebf26;
+      font-weight: bold;
+    } /* Keyword */
+    body:not([data-theme="light"]) .highlight .l {
+      color: #d0d0d0;
+    } /* Literal */
+    body:not([data-theme="light"]) .highlight .n {
+      color: #d0d0d0;
+    } /* Name */
+    body:not([data-theme="light"]) .highlight .o {
+      color: #d0d0d0;
+    } /* Operator */
+    body:not([data-theme="light"]) .highlight .x {
+      color: #d0d0d0;
+    } /* Other */
+    body:not([data-theme="light"]) .highlight .p {
+      color: #d0d0d0;
+    } /* Punctuation */
+    body:not([data-theme="light"]) .highlight .ch {
+      color: #ababab;
+      font-style: italic;
+    } /* Comment.Hashbang */
+    body:not([data-theme="light"]) .highlight .cm {
+      color: #ababab;
+      font-style: italic;
+    } /* Comment.Multiline */
+    body:not([data-theme="light"]) .highlight .cp {
+      color: #ff3a3a;
+      font-weight: bold;
+    } /* Comment.Preproc */
+    body:not([data-theme="light"]) .highlight .cpf {
+      color: #ababab;
+      font-style: italic;
+    } /* Comment.PreprocFile */
+    body:not([data-theme="light"]) .highlight .c1 {
+      color: #ababab;
+      font-style: italic;
+    } /* Comment.Single */
+    body:not([data-theme="light"]) .highlight .cs {
+      color: #e50808;
+      font-weight: bold;
+      background-color: #520000;
+    } /* Comment.Special */
+    body:not([data-theme="light"]) .highlight .gd {
+      color: #d22323;
+    } /* Generic.Deleted */
+    body:not([data-theme="light"]) .highlight .ge {
+      color: #d0d0d0;
+      font-style: italic;
+    } /* Generic.Emph */
+    body:not([data-theme="light"]) .highlight .ges {
+      color: #d0d0d0;
+      font-weight: bold;
+      font-style: italic;
+    } /* Generic.EmphStrong */
+    body:not([data-theme="light"]) .highlight .gr {
+      color: #d22323;
+    } /* Generic.Error */
+    body:not([data-theme="light"]) .highlight .gh {
+      color: #ffffff;
+      font-weight: bold;
+    } /* Generic.Heading */
+    body:not([data-theme="light"]) .highlight .gi {
+      color: #589819;
+    } /* Generic.Inserted */
+    body:not([data-theme="light"]) .highlight .go {
+      color: #cccccc;
+    } /* Generic.Output */
+    body:not([data-theme="light"]) .highlight .gp {
+      color: #aaaaaa;
+    } /* Generic.Prompt */
+    body:not([data-theme="light"]) .highlight .gs {
+      color: #d0d0d0;
+      font-weight: bold;
+    } /* Generic.Strong */
+    body:not([data-theme="light"]) .highlight .gu {
+      color: #ffffff;
+      text-decoration: underline;
+    } /* Generic.Subheading */
+    body:not([data-theme="light"]) .highlight .gt {
+      color: #d22323;
+    } /* Generic.Traceback */
+    body:not([data-theme="light"]) .highlight .kc {
+      color: #6ebf26;
+      font-weight: bold;
+    } /* Keyword.Constant */
+    body:not([data-theme="light"]) .highlight .kd {
+      color: #6ebf26;
+      font-weight: bold;
+    } /* Keyword.Declaration */
+    body:not([data-theme="light"]) .highlight .kn {
+      color: #6ebf26;
+      font-weight: bold;
+    } /* Keyword.Namespace */
+    body:not([data-theme="light"]) .highlight .kp {
+      color: #6ebf26;
+    } /* Keyword.Pseudo */
+    body:not([data-theme="light"]) .highlight .kr {
+      color: #6ebf26;
+      font-weight: bold;
+    } /* Keyword.Reserved */
+    body:not([data-theme="light"]) .highlight .kt {
+      color: #6ebf26;
+      font-weight: bold;
+    } /* Keyword.Type */
+    body:not([data-theme="light"]) .highlight .ld {
+      color: #d0d0d0;
+    } /* Literal.Date */
+    body:not([data-theme="light"]) .highlight .m {
+      color: #51b2fd;
+    } /* Literal.Number */
+    body:not([data-theme="light"]) .highlight .s {
+      color: #ed9d13;
+    } /* Literal.String */
+    body:not([data-theme="light"]) .highlight .na {
+      color: #bbbbbb;
+    } /* Name.Attribute */
+    body:not([data-theme="light"]) .highlight .nb {
+      color: #2fbccd;
+    } /* Name.Builtin */
+    body:not([data-theme="light"]) .highlight .nc {
+      color: #71adff;
+      text-decoration: underline;
+    } /* Name.Class */
+    body:not([data-theme="light"]) .highlight .no {
+      color: #40ffff;
+    } /* Name.Constant */
+    body:not([data-theme="light"]) .highlight .nd {
+      color: #ffa500;
+    } /* Name.Decorator */
+    body:not([data-theme="light"]) .highlight .ni {
+      color: #d0d0d0;
+    } /* Name.Entity */
+    body:not([data-theme="light"]) .highlight .ne {
+      color: #bbbbbb;
+    } /* Name.Exception */
+    body:not([data-theme="light"]) .highlight .nf {
+      color: #71adff;
+    } /* Name.Function */
+    body:not([data-theme="light"]) .highlight .nl {
+      color: #d0d0d0;
+    } /* Name.Label */
+    body:not([data-theme="light"]) .highlight .nn {
+      color: #71adff;
+      text-decoration: underline;
+    } /* Name.Namespace */
+    body:not([data-theme="light"]) .highlight .nx {
+      color: #d0d0d0;
+    } /* Name.Other */
+    body:not([data-theme="light"]) .highlight .py {
+      color: #d0d0d0;
+    } /* Name.Property */
+    body:not([data-theme="light"]) .highlight .nt {
+      color: #6ebf26;
+      font-weight: bold;
+    } /* Name.Tag */
+    body:not([data-theme="light"]) .highlight .nv {
+      color: #40ffff;
+    } /* Name.Variable */
+    body:not([data-theme="light"]) .highlight .ow {
+      color: #6ebf26;
+      font-weight: bold;
+    } /* Operator.Word */
+    body:not([data-theme="light"]) .highlight .pm {
+      color: #d0d0d0;
+    } /* Punctuation.Marker */
+    body:not([data-theme="light"]) .highlight .w {
+      color: #666666;
+    } /* Text.Whitespace */
+    body:not([data-theme="light"]) .highlight .mb {
+      color: #51b2fd;
+    } /* Literal.Number.Bin */
+    body:not([data-theme="light"]) .highlight .mf {
+      color: #51b2fd;
+    } /* Literal.Number.Float */
+    body:not([data-theme="light"]) .highlight .mh {
+      color: #51b2fd;
+    } /* Literal.Number.Hex */
+    body:not([data-theme="light"]) .highlight .mi {
+      color: #51b2fd;
+    } /* Literal.Number.Integer */
+    body:not([data-theme="light"]) .highlight .mo {
+      color: #51b2fd;
+    } /* Literal.Number.Oct */
+    body:not([data-theme="light"]) .highlight .sa {
+      color: #ed9d13;
+    } /* Literal.String.Affix */
+    body:not([data-theme="light"]) .highlight .sb {
+      color: #ed9d13;
+    } /* Literal.String.Backtick */
+    body:not([data-theme="light"]) .highlight .sc {
+      color: #ed9d13;
+    } /* Literal.String.Char */
+    body:not([data-theme="light"]) .highlight .dl {
+      color: #ed9d13;
+    } /* Literal.String.Delimiter */
+    body:not([data-theme="light"]) .highlight .sd {
+      color: #ed9d13;
+    } /* Literal.String.Doc */
+    body:not([data-theme="light"]) .highlight .s2 {
+      color: #ed9d13;
+    } /* Literal.String.Double */
+    body:not([data-theme="light"]) .highlight .se {
+      color: #ed9d13;
+    } /* Literal.String.Escape */
+    body:not([data-theme="light"]) .highlight .sh {
+      color: #ed9d13;
+    } /* Literal.String.Heredoc */
+    body:not([data-theme="light"]) .highlight .si {
+      color: #ed9d13;
+    } /* Literal.String.Interpol */
+    body:not([data-theme="light"]) .highlight .sx {
+      color: #ffa500;
+    } /* Literal.String.Other */
+    body:not([data-theme="light"]) .highlight .sr {
+      color: #ed9d13;
+    } /* Literal.String.Regex */
+    body:not([data-theme="light"]) .highlight .s1 {
+      color: #ed9d13;
+    } /* Literal.String.Single */
+    body:not([data-theme="light"]) .highlight .ss {
+      color: #ed9d13;
+    } /* Literal.String.Symbol */
+    body:not([data-theme="light"]) .highlight .bp {
+      color: #2fbccd;
+    } /* Name.Builtin.Pseudo */
+    body:not([data-theme="light"]) .highlight .fm {
+      color: #71adff;
+    } /* Name.Function.Magic */
+    body:not([data-theme="light"]) .highlight .vc {
+      color: #40ffff;
+    } /* Name.Variable.Class */
+    body:not([data-theme="light"]) .highlight .vg {
+      color: #40ffff;
+    } /* Name.Variable.Global */
+    body:not([data-theme="light"]) .highlight .vi {
+      color: #40ffff;
+    } /* Name.Variable.Instance */
+    body:not([data-theme="light"]) .highlight .vm {
+      color: #40ffff;
+    } /* Name.Variable.Magic */
+    body:not([data-theme="light"]) .highlight .il {
+      color: #51b2fd;
+    } /* Literal.Number.Integer.Long */
+  }
 }
-}
\ No newline at end of file
diff --git a/docs/build/html/about.html b/docs/build/html/about.html
index d4a5bb72..e1ecdb5d 100644
--- a/docs/build/html/about.html
+++ b/docs/build/html/about.html
@@ -1,251 +1,526 @@
-<!doctype html>
+<!DOCTYPE html>
 <html class="no-js" lang="en">
-  <head><meta charset="utf-8"/>
-    <meta name="viewport" content="width=device-width,initial-scale=1"/>
-    <meta name="color-scheme" content="light dark"><link rel="author" title="About these documents" href="#" /><link rel="index" title="Index" href="genindex.html" /><link rel="search" title="Search" href="search.html" /><link rel="next" title="Custom Usage" href="custom_usage.html" /><link rel="prev" title="&lt;no title&gt;" href="index.html" />
+  <head>
+    <meta charset="utf-8" />
+    <meta name="viewport" content="width=device-width,initial-scale=1" />
+    <meta name="color-scheme" content="light dark" />
+    <meta
+      name="generator"
+      content="Docutils 0.18.1: http://docutils.sourceforge.net/"
+    />
+    <link rel="author" title="About these documents" href="#" />
+    <link rel="index" title="Index" href="genindex.html" />
+    <link rel="search" title="Search" href="search.html" />
+    <link rel="next" title="Custom Usage" href="custom_usage.html" />
+    <link rel="prev" title="&lt;no title&gt;" href="index.html" />
 
-    <meta name="generator" content="sphinx-5.3.0, furo 2022.09.29"/>
-        <title>Skills Extractor - Skills Extractor v1.0.1 documentation</title>
-      <link rel="stylesheet" type="text/css" href="_static/pygments.css" />
-    <link rel="stylesheet" type="text/css" href="_static/styles/furo.css?digest=d81277517bee4d6b0349d71bb2661d4890b5617c" />
-    <link rel="stylesheet" type="text/css" href="_static/styles/furo-extensions.css?digest=30d1aed668e5c3a91c3e3bf6a60b675221979f0e" />
-    
-    
+    <meta name="generator" content="sphinx-5.3.0, furo 2022.09.29" />
+    <title>Skills Extractor - Skills Extractor v1.0.1 documentation</title>
+    <link rel="stylesheet" type="text/css" href="_static/pygments.css" />
+    <link
+      rel="stylesheet"
+      type="text/css"
+      href="_static/styles/furo.css?digest=d81277517bee4d6b0349d71bb2661d4890b5617c"
+    />
+    <link
+      rel="stylesheet"
+      type="text/css"
+      href="_static/styles/furo-extensions.css?digest=30d1aed668e5c3a91c3e3bf6a60b675221979f0e"
+    />
 
-
-<style>
-  body {
-    --color-code-background: #f8f8f8;
-  --color-code-foreground: black;
-  
-  }
-  @media not print {
-    body[data-theme="dark"] {
-      --color-code-background: #202020;
-  --color-code-foreground: #d0d0d0;
-  
-    }
-    @media (prefers-color-scheme: dark) {
-      body:not([data-theme="light"]) {
-        --color-code-background: #202020;
-  --color-code-foreground: #d0d0d0;
-  
+    <style>
+      body {
+        --color-code-background: #f8f8f8;
+        --color-code-foreground: black;
+      }
+      @media not print {
+        body[data-theme="dark"] {
+          --color-code-background: #202020;
+          --color-code-foreground: #d0d0d0;
+        }
+        @media (prefers-color-scheme: dark) {
+          body:not([data-theme="light"]) {
+            --color-code-background: #202020;
+            --color-code-foreground: #d0d0d0;
+          }
+        }
       }
-    }
-  }
-</style></head>
+    </style>
+  </head>
   <body>
-    
     <script>
       document.body.dataset.theme = localStorage.getItem("theme") || "auto";
     </script>
-    
 
-<svg xmlns="http://www.w3.org/2000/svg" style="display: none;">
-  <symbol id="svg-toc" viewBox="0 0 24 24">
-    <title>Contents</title>
-    <svg stroke="currentColor" fill="currentColor" stroke-width="0" viewBox="0 0 1024 1024">
-      <path d="M408 442h480c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8H408c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8zm-8 204c0 4.4 3.6 8 8 8h480c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8H408c-4.4 0-8 3.6-8 8v56zm504-486H120c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8h784c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8zm0 632H120c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8h784c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8zM115.4 518.9L271.7 642c5.8 4.6 14.4.5 14.4-6.9V388.9c0-7.4-8.5-11.5-14.4-6.9L115.4 505.1a8.74 8.74 0 0 0 0 13.8z"/>
-    </svg>
-  </symbol>
-  <symbol id="svg-menu" viewBox="0 0 24 24">
-    <title>Menu</title>
-    <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
-      stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="feather-menu">
-      <line x1="3" y1="12" x2="21" y2="12"></line>
-      <line x1="3" y1="6" x2="21" y2="6"></line>
-      <line x1="3" y1="18" x2="21" y2="18"></line>
-    </svg>
-  </symbol>
-  <symbol id="svg-arrow-right" viewBox="0 0 24 24">
-    <title>Expand</title>
-    <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
-      stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="feather-chevron-right">
-      <polyline points="9 18 15 12 9 6"></polyline>
-    </svg>
-  </symbol>
-  <symbol id="svg-sun" viewBox="0 0 24 24">
-    <title>Light mode</title>
-    <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
-      stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round" class="feather-sun">
-      <circle cx="12" cy="12" r="5"></circle>
-      <line x1="12" y1="1" x2="12" y2="3"></line>
-      <line x1="12" y1="21" x2="12" y2="23"></line>
-      <line x1="4.22" y1="4.22" x2="5.64" y2="5.64"></line>
-      <line x1="18.36" y1="18.36" x2="19.78" y2="19.78"></line>
-      <line x1="1" y1="12" x2="3" y2="12"></line>
-      <line x1="21" y1="12" x2="23" y2="12"></line>
-      <line x1="4.22" y1="19.78" x2="5.64" y2="18.36"></line>
-      <line x1="18.36" y1="5.64" x2="19.78" y2="4.22"></line>
-    </svg>
-  </symbol>
-  <symbol id="svg-moon" viewBox="0 0 24 24">
-    <title>Dark mode</title>
-    <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
-      stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round" class="icon-tabler-moon">
-      <path stroke="none" d="M0 0h24v24H0z" fill="none" />
-      <path d="M12 3c.132 0 .263 0 .393 0a7.5 7.5 0 0 0 7.92 12.446a9 9 0 1 1 -8.313 -12.454z" />
-    </svg>
-  </symbol>
-  <symbol id="svg-sun-half" viewBox="0 0 24 24">
-    <title>Auto light/dark mode</title>
-    <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
-      stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round" class="icon-tabler-shadow">
-      <path stroke="none" d="M0 0h24v24H0z" fill="none"/>
-      <circle cx="12" cy="12" r="9" />
-      <path d="M13 12h5" />
-      <path d="M13 15h4" />
-      <path d="M13 18h1" />
-      <path d="M13 9h4" />
-      <path d="M13 6h1" />
+    <svg xmlns="http://www.w3.org/2000/svg" style="display: none">
+      <symbol id="svg-toc" viewBox="0 0 24 24">
+        <title>Contents</title>
+        <svg
+          stroke="currentColor"
+          fill="currentColor"
+          stroke-width="0"
+          viewBox="0 0 1024 1024"
+        >
+          <path
+            d="M408 442h480c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8H408c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8zm-8 204c0 4.4 3.6 8 8 8h480c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8H408c-4.4 0-8 3.6-8 8v56zm504-486H120c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8h784c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8zm0 632H120c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8h784c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8zM115.4 518.9L271.7 642c5.8 4.6 14.4.5 14.4-6.9V388.9c0-7.4-8.5-11.5-14.4-6.9L115.4 505.1a8.74 8.74 0 0 0 0 13.8z"
+          />
+        </svg>
+      </symbol>
+      <symbol id="svg-menu" viewBox="0 0 24 24">
+        <title>Menu</title>
+        <svg
+          xmlns="http://www.w3.org/2000/svg"
+          viewBox="0 0 24 24"
+          fill="none"
+          stroke="currentColor"
+          stroke-width="2"
+          stroke-linecap="round"
+          stroke-linejoin="round"
+          class="feather-menu"
+        >
+          <line x1="3" y1="12" x2="21" y2="12"></line>
+          <line x1="3" y1="6" x2="21" y2="6"></line>
+          <line x1="3" y1="18" x2="21" y2="18"></line>
+        </svg>
+      </symbol>
+      <symbol id="svg-arrow-right" viewBox="0 0 24 24">
+        <title>Expand</title>
+        <svg
+          xmlns="http://www.w3.org/2000/svg"
+          viewBox="0 0 24 24"
+          fill="none"
+          stroke="currentColor"
+          stroke-width="2"
+          stroke-linecap="round"
+          stroke-linejoin="round"
+          class="feather-chevron-right"
+        >
+          <polyline points="9 18 15 12 9 6"></polyline>
+        </svg>
+      </symbol>
+      <symbol id="svg-sun" viewBox="0 0 24 24">
+        <title>Light mode</title>
+        <svg
+          xmlns="http://www.w3.org/2000/svg"
+          viewBox="0 0 24 24"
+          fill="none"
+          stroke="currentColor"
+          stroke-width="1.5"
+          stroke-linecap="round"
+          stroke-linejoin="round"
+          class="feather-sun"
+        >
+          <circle cx="12" cy="12" r="5"></circle>
+          <line x1="12" y1="1" x2="12" y2="3"></line>
+          <line x1="12" y1="21" x2="12" y2="23"></line>
+          <line x1="4.22" y1="4.22" x2="5.64" y2="5.64"></line>
+          <line x1="18.36" y1="18.36" x2="19.78" y2="19.78"></line>
+          <line x1="1" y1="12" x2="3" y2="12"></line>
+          <line x1="21" y1="12" x2="23" y2="12"></line>
+          <line x1="4.22" y1="19.78" x2="5.64" y2="18.36"></line>
+          <line x1="18.36" y1="5.64" x2="19.78" y2="4.22"></line>
+        </svg>
+      </symbol>
+      <symbol id="svg-moon" viewBox="0 0 24 24">
+        <title>Dark mode</title>
+        <svg
+          xmlns="http://www.w3.org/2000/svg"
+          viewBox="0 0 24 24"
+          fill="none"
+          stroke="currentColor"
+          stroke-width="1.5"
+          stroke-linecap="round"
+          stroke-linejoin="round"
+          class="icon-tabler-moon"
+        >
+          <path stroke="none" d="M0 0h24v24H0z" fill="none" />
+          <path
+            d="M12 3c.132 0 .263 0 .393 0a7.5 7.5 0 0 0 7.92 12.446a9 9 0 1 1 -8.313 -12.454z"
+          />
+        </svg>
+      </symbol>
+      <symbol id="svg-sun-half" viewBox="0 0 24 24">
+        <title>Auto light/dark mode</title>
+        <svg
+          xmlns="http://www.w3.org/2000/svg"
+          viewBox="0 0 24 24"
+          fill="none"
+          stroke="currentColor"
+          stroke-width="1.5"
+          stroke-linecap="round"
+          stroke-linejoin="round"
+          class="icon-tabler-shadow"
+        >
+          <path stroke="none" d="M0 0h24v24H0z" fill="none" />
+          <circle cx="12" cy="12" r="9" />
+          <path d="M13 12h5" />
+          <path d="M13 15h4" />
+          <path d="M13 18h1" />
+          <path d="M13 9h4" />
+          <path d="M13 6h1" />
+        </svg>
+      </symbol>
     </svg>
-  </symbol>
-</svg>
-
-<input type="checkbox" class="sidebar-toggle" name="__navigation" id="__navigation">
-<input type="checkbox" class="sidebar-toggle" name="__toc" id="__toc">
-<label class="overlay sidebar-overlay" for="__navigation">
-  <div class="visually-hidden">Hide navigation sidebar</div>
-</label>
-<label class="overlay toc-overlay" for="__toc">
-  <div class="visually-hidden">Hide table of contents sidebar</div>
-</label>
-
-
-
-<div class="page">
-  <header class="mobile-header">
-    <div class="header-left">
-      <label class="nav-overlay-icon" for="__navigation">
-        <div class="visually-hidden">Toggle site navigation sidebar</div>
-        <i class="icon"><svg><use href="#svg-menu"></use></svg></i>
-      </label>
-    </div>
-    <div class="header-center">
-      <a href="index.html"><div class="brand">Skills Extractor v1.0.1 documentation</div></a>
-    </div>
-    <div class="header-right">
-      <div class="theme-toggle-container theme-toggle-header">
-        <button class="theme-toggle">
-          <div class="visually-hidden">Toggle Light / Dark / Auto color theme</div>
-          <svg class="theme-icon-when-auto"><use href="#svg-sun-half"></use></svg>
-          <svg class="theme-icon-when-dark"><use href="#svg-moon"></use></svg>
-          <svg class="theme-icon-when-light"><use href="#svg-sun"></use></svg>
-        </button>
-      </div>
-      <label class="toc-overlay-icon toc-header-icon" for="__toc">
-        <div class="visually-hidden">Toggle table of contents sidebar</div>
-        <i class="icon"><svg><use href="#svg-toc"></use></svg></i>
-      </label>
-    </div>
-  </header>
-  <aside class="sidebar-drawer">
-    <div class="sidebar-container">
-      
-      <div class="sidebar-sticky"><a class="sidebar-brand centered" href="index.html">
-  
-  <div class="sidebar-logo-container">
-    <img class="sidebar-logo" src="_static/nesta_escoe_transparent.png" alt="Logo"/>
-  </div>
-  
-  <span class="sidebar-brand-text">Skills Extractor v1.0.1 documentation</span>
-  
-</a><form class="sidebar-search-container" method="get" action="search.html" role="search">
-  <input class="sidebar-search" placeholder=Search name="q" aria-label="Search">
-  <input type="hidden" name="check_keywords" value="yes">
-  <input type="hidden" name="area" value="default">
-</form>
-<div id="searchbox"></div><div class="sidebar-scroll"><div class="sidebar-tree">
-  <ul class="current">
-<li class="toctree-l1 current current-page"><a class="current reference internal" href="#">Skills Extractor</a></li>
-<li class="toctree-l1"><a class="reference internal" href="custom_usage.html">Custom Usage</a></li>
-<li class="toctree-l1"><a class="reference internal" href="pipeline_summary.html">Pipeline summary and metrics</a></li>
-<li class="toctree-l1"><a class="reference internal" href="model_card.html">Model Cards</a></li>
-<li class="toctree-l1"><a class="reference internal" href="labelling.html">Entity Labelling</a></li>
-<li class="toctree-l1"><a class="reference internal" href="extract_skills.html">The <code class="docutils literal notranslate"><span class="pre">ExtractSkills</span></code> class</a></li>
-<li class="toctree-l1"><a class="reference internal" href="license.html">The MIT License (MIT)</a></li>
-</ul>
 
-</div>
-</div>
+    <input
+      type="checkbox"
+      class="sidebar-toggle"
+      name="__navigation"
+      id="__navigation"
+    />
+    <input type="checkbox" class="sidebar-toggle" name="__toc" id="__toc" />
+    <label class="overlay sidebar-overlay" for="__navigation">
+      <div class="visually-hidden">Hide navigation sidebar</div>
+    </label>
+    <label class="overlay toc-overlay" for="__toc">
+      <div class="visually-hidden">Hide table of contents sidebar</div>
+    </label>
 
-      </div>
-      
-    </div>
-  </aside>
-  <div class="main">
-    <div class="content">
-      <div class="article-container">
-        <a href="#" class="back-to-top muted-link">
-          <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24">
-            <path d="M13 20h-2V8l-5.5 5.5-1.42-1.42L12 4.16l7.92 7.92-1.42 1.42L13 8v12z"></path>
-          </svg>
-          <span>Back to top</span>
-        </a>
-        <div class="content-icon-container">
-          
-<div class="theme-toggle-container theme-toggle-content">
+    <div class="page">
+      <header class="mobile-header">
+        <div class="header-left">
+          <label class="nav-overlay-icon" for="__navigation">
+            <div class="visually-hidden">Toggle site navigation sidebar</div>
+            <i class="icon"
+              ><svg><use href="#svg-menu"></use></svg
+            ></i>
+          </label>
+        </div>
+        <div class="header-center">
+          <a href="index.html"
+            ><div class="brand">Skills Extractor v1.0.1 documentation</div></a
+          >
+        </div>
+        <div class="header-right">
+          <div class="theme-toggle-container theme-toggle-header">
             <button class="theme-toggle">
-              <div class="visually-hidden">Toggle Light / Dark / Auto color theme</div>
-              <svg class="theme-icon-when-auto"><use href="#svg-sun-half"></use></svg>
-              <svg class="theme-icon-when-dark"><use href="#svg-moon"></use></svg>
-              <svg class="theme-icon-when-light"><use href="#svg-sun"></use></svg>
+              <div class="visually-hidden">
+                Toggle Light / Dark / Auto color theme
+              </div>
+              <svg class="theme-icon-when-auto">
+                <use href="#svg-sun-half"></use>
+              </svg>
+              <svg class="theme-icon-when-dark">
+                <use href="#svg-moon"></use>
+              </svg>
+              <svg class="theme-icon-when-light">
+                <use href="#svg-sun"></use>
+              </svg>
             </button>
           </div>
-          <label class="toc-overlay-icon toc-content-icon" for="__toc">
+          <label class="toc-overlay-icon toc-header-icon" for="__toc">
             <div class="visually-hidden">Toggle table of contents sidebar</div>
-            <i class="icon"><svg><use href="#svg-toc"></use></svg></i>
+            <i class="icon"
+              ><svg><use href="#svg-toc"></use></svg
+            ></i>
           </label>
         </div>
-        <article role="main">
-          <div class="section" id="skills-extractor">
-<h1>Skills Extractor<a class="headerlink" href="#skills-extractor" title="Permalink to this heading">#</a></h1>
-<ul class="simple">
-<li><p><span class="xref myst">Installation</span></p></li>
-<li><p><span class="xref myst">Using Nesta’s Skills Extractor library</span></p></li>
-<li><p><span class="xref myst">Development</span></p></li>
-</ul>
-<div class="section" id="welcome-to-nesta-s-skills-extractor-library">
-<h2>Welcome to Nesta’s Skills Extractor Library<a class="headerlink" href="#welcome-to-nesta-s-skills-extractor-library" title="Permalink to this heading">#</a></h2>
-<p>Welcome to the documentation of Nesta’s skills extractor library.</p>
-<p>This page contains information on how to install and use Nesta’s skills extraction library. The skills library allows you to extract skills phrases from job advertisement texts and maps them onto a skills taxonomy of your choice.</p>
-<p><img alt="" src="_images/highlevel_example.png" /></p>
-<p>We currently support three different taxonomies to map onto: the <a class="reference external" href="https://esco.ec.europa.eu/en/about-esco/what-esco">European Commission’s European Skills, Competences, and Occupations (ESCO)</a>, <a class="reference external" href="https://skills.lightcast.io/">Lightcast’s Open Skills</a> and a “toy” taxonomy developed internally for the purpose of testing.</p>
-<p>If you’d like to learn more about the models used in the library, please refer to the <a class="reference external" href="https://nestauk.github.io/ojd_daps_skills/build/html/model_card.html">model card page</a>.</p>
-<p>You may also want to read more about the wider project by reading:</p>
-<ol class="arabic simple">
-<li><p>Our <a class="reference external" href="https://www.escoe.ac.uk/the-skills-extractor-library">Introduction blog</a></p></li>
-<li><p>Our <a class="reference external" href="https://www.nesta.org.uk/data-visualisation-and-interactive/exploring-uk-skills-demand/">interactive analysis blog</a></p></li>
-</ol>
-</div>
-<div class="section" id="installation-a-name-installation-a">
-<h2>Installation <a name="installation"></a><a class="headerlink" href="#installation-a-name-installation-a" title="Permalink to this heading">#</a></h2>
-<p>You can use pip to install the library:</p>
-<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">pip</span> <span class="n">install</span> <span class="n">ojd</span><span class="o">-</span><span class="n">daps</span><span class="o">-</span><span class="n">skills</span>
-</pre></div>
-</div>
-<p>You will also need to download <a class="reference external" href="https://spacy.io/models/en">spaCy’s</a> <code class="docutils literal notranslate"><span class="pre">en_core_web_sm</span></code> model:</p>
-<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">python</span> <span class="o">-</span><span class="n">m</span> <span class="n">spacy</span> <span class="n">download</span> <span class="n">en_core_web_sm</span>
-</pre></div>
-</div>
-<div class="section" id="aws-cli">
-<h3>AWS CLI<a class="headerlink" href="#aws-cli" title="Permalink to this heading">#</a></h3>
-<p>When the package is first used it will automatically download a folder of neccessary data and models. This file is ~ 1GB. Although you don’t need to have AWS credentials for this to work, you will need to download the <a class="reference external" href="https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html">AWS CLI</a>.</p>
-</div>
-</div>
-<div class="section" id="tl-dr-using-nesta-s-skills-extractor-library-a-name-usage-a">
-<h2>TL;DR: Using Nesta’s Skills Extractor library <a name="usage"></a><a class="headerlink" href="#tl-dr-using-nesta-s-skills-extractor-library-a-name-usage-a" title="Permalink to this heading">#</a></h2>
-<p>The library supports three key skills extraction functionalities :</p>
-<ol class="arabic simple">
-<li><p>Extract AND map skills to a taxonomy of your choice;</p></li>
-<li><p>Extract skills from job adverts;</p></li>
-<li><p>Map a list of skills to a taxonomy of your choice.</p></li>
-</ol>
-<p>The option <code class="docutils literal notranslate"><span class="pre">local=False</span></code> can only be used by those with access to Nesta’s S3 bucket.</p>
-<div class="section" id="extract-and-map-skills">
-<h3>1. Extract AND map skills<a class="headerlink" href="#extract-and-map-skills" title="Permalink to this heading">#</a></h3>
-<p>If you would like to extract AND map skills in one step, you are able to do so with the <code class="docutils literal notranslate"><span class="pre">extract_skills</span></code> method.</p>
-<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="kn">from</span> <span class="nn">ojd_daps_skills.pipeline.extract_skills.extract_skills</span> <span class="kn">import</span> <span class="n">ExtractSkills</span> <span class="c1">#import the module</span>
+      </header>
+      <aside class="sidebar-drawer">
+        <div class="sidebar-container">
+          <div class="sidebar-sticky">
+            <a class="sidebar-brand centered" href="index.html">
+              <div class="sidebar-logo-container">
+                <img
+                  class="sidebar-logo"
+                  src="_static/nesta_escoe_transparent.png"
+                  alt="Logo"
+                />
+              </div>
+
+              <span class="sidebar-brand-text"
+                >Skills Extractor v1.0.1 documentation</span
+              >
+            </a>
+            <form
+              class="sidebar-search-container"
+              method="get"
+              action="search.html"
+              role="search"
+            >
+              <input
+                class="sidebar-search"
+                placeholder="Search"
+                name="q"
+                aria-label="Search"
+              />
+              <input type="hidden" name="check_keywords" value="yes" />
+              <input type="hidden" name="area" value="default" />
+            </form>
+            <div id="searchbox"></div>
+            <div class="sidebar-scroll">
+              <div class="sidebar-tree">
+                <ul class="current">
+                  <li class="toctree-l1 current current-page">
+                    <a class="current reference internal" href="#"
+                      >Skills Extractor</a
+                    >
+                  </li>
+                  <li class="toctree-l1">
+                    <a class="reference internal" href="custom_usage.html"
+                      >Custom Usage</a
+                    >
+                  </li>
+                  <li class="toctree-l1">
+                    <a class="reference internal" href="pipeline_summary.html"
+                      >Pipeline summary and metrics</a
+                    >
+                  </li>
+                  <li class="toctree-l1">
+                    <a class="reference internal" href="model_card.html"
+                      >Model Cards</a
+                    >
+                  </li>
+                  <li class="toctree-l1">
+                    <a class="reference internal" href="labelling.html"
+                      >Entity Labelling</a
+                    >
+                  </li>
+                  <li class="toctree-l1">
+                    <a class="reference internal" href="extract_skills.html"
+                      >The
+                      <code class="docutils literal notranslate"
+                        ><span class="pre">ExtractSkills</span></code
+                      >
+                      class</a
+                    >
+                  </li>
+                  <li class="toctree-l1">
+                    <a class="reference internal" href="license.html"
+                      >The MIT License (MIT)</a
+                    >
+                  </li>
+                </ul>
+              </div>
+            </div>
+          </div>
+        </div>
+      </aside>
+      <div class="main">
+        <div class="content">
+          <div class="article-container">
+            <a href="#" class="back-to-top muted-link">
+              <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24">
+                <path
+                  d="M13 20h-2V8l-5.5 5.5-1.42-1.42L12 4.16l7.92 7.92-1.42 1.42L13 8v12z"
+                ></path>
+              </svg>
+              <span>Back to top</span>
+            </a>
+            <div class="content-icon-container">
+              <div class="theme-toggle-container theme-toggle-content">
+                <button class="theme-toggle">
+                  <div class="visually-hidden">
+                    Toggle Light / Dark / Auto color theme
+                  </div>
+                  <svg class="theme-icon-when-auto">
+                    <use href="#svg-sun-half"></use>
+                  </svg>
+                  <svg class="theme-icon-when-dark">
+                    <use href="#svg-moon"></use>
+                  </svg>
+                  <svg class="theme-icon-when-light">
+                    <use href="#svg-sun"></use>
+                  </svg>
+                </button>
+              </div>
+              <label class="toc-overlay-icon toc-content-icon" for="__toc">
+                <div class="visually-hidden">
+                  Toggle table of contents sidebar
+                </div>
+                <i class="icon"
+                  ><svg><use href="#svg-toc"></use></svg
+                ></i>
+              </label>
+            </div>
+            <article role="main">
+              <section id="skills-extractor">
+                <h1>
+                  Skills Extractor<a
+                    class="headerlink"
+                    href="#skills-extractor"
+                    title="Permalink to this heading"
+                    >#</a
+                  >
+                </h1>
+                <ul class="simple">
+                  <li>
+                    <p>
+                      <a class="reference internal" href="#installation"
+                        ><span class="xref myst">Installation</span></a
+                      >
+                    </p>
+                  </li>
+                  <li>
+                    <p>
+                      <a class="reference internal" href="#usage"
+                        ><span class="xref myst"
+                          >Using Nesta’s Skills Extractor library</span
+                        ></a
+                      >
+                    </p>
+                  </li>
+                  <li>
+                    <p>
+                      <a class="reference internal" href="#development"
+                        ><span class="xref myst">Development</span></a
+                      >
+                    </p>
+                  </li>
+                </ul>
+                <section id="welcome-to-nesta-s-skills-extractor-library">
+                  <h2>
+                    Welcome to Nesta’s Skills Extractor Library<a
+                      class="headerlink"
+                      href="#welcome-to-nesta-s-skills-extractor-library"
+                      title="Permalink to this heading"
+                      >#</a
+                    >
+                  </h2>
+                  <p>
+                    Welcome to the documentation of Nesta’s skills extractor
+                    library.
+                  </p>
+                  <p>
+                    This page contains information on how to install and use
+                    Nesta’s skills extraction library. The skills library allows
+                    you to extract skills phrases from job advertisement texts
+                    and maps them onto a skills taxonomy of your choice.
+                  </p>
+                  <p><img alt="" src="_images/highlevel_example.png" /></p>
+                  <p>
+                    We currently support three different taxonomies to map onto:
+                    the
+                    <a
+                      class="reference external"
+                      href="https://esco.ec.europa.eu/en/about-esco/what-esco"
+                      >European Commission’s European Skills, Competences, and
+                      Occupations (ESCO)</a
+                    >,
+                    <a
+                      class="reference external"
+                      href="https://skills.lightcast.io/"
+                      >Lightcast’s Open Skills</a
+                    >
+                    and a “toy” taxonomy developed internally for the purpose of
+                    testing.
+                  </p>
+                  <p>
+                    If you’d like to learn more about the models used in the
+                    library, please refer to the
+                    <a
+                      class="reference external"
+                      href="https://nestauk.github.io/ojd_daps_skills/build/html/model_card.html"
+                      >model card page</a
+                    >.
+                  </p>
+                  <p>
+                    You may also want to read more about the wider project by
+                    reading:
+                  </p>
+                  <ol class="arabic simple">
+                    <li>
+                      <p>
+                        Our
+                        <a
+                          class="reference external"
+                          href="https://www.escoe.ac.uk/the-skills-extractor-library"
+                          >Introduction blog</a
+                        >
+                      </p>
+                    </li>
+                    <li>
+                      <p>
+                        Our
+                        <a
+                          class="reference external"
+                          href="https://www.nesta.org.uk/data-visualisation-and-interactive/exploring-uk-skills-demand/"
+                          >interactive analysis blog</a
+                        >
+                      </p>
+                    </li>
+                  </ol>
+                </section>
+                <section id="installation">
+                  <h2>
+                    Installation <a name="installation"></a
+                    ><a
+                      class="headerlink"
+                      href="#installation"
+                      title="Permalink to this heading"
+                      >#</a
+                    >
+                  </h2>
+                  <p>You can use pip to install the library:</p>
+                  <div class="highlight-default notranslate">
+                    <div class="highlight">
+                      <pre><span></span><span class="n">pip</span> <span class="n">install</span> <span class="n">ojd</span><span class="o">-</span><span class="n">daps</span><span class="o">-</span><span class="n">skills</span>
+</pre>
+                    </div>
+                  </div>
+                  <p>
+                    Note that this package was developed on MacOS and tested on
+                    Ubuntu. Changes have been made to be compatible on a Windows
+                    system but are not tested and cannot be guaranteed.
+                  </p>
+                  <p>
+                    When the package is first used it will automatically
+                    download a folder of neccessary data and models. (~1GB)
+                  </p>
+                </section>
+                <section id="tl-dr-using-nesta-s-skills-extractor-library">
+                  <h2>
+                    TL;DR: Using Nesta’s Skills Extractor library
+                    <a name="usage"></a
+                    ><a
+                      class="headerlink"
+                      href="#tl-dr-using-nesta-s-skills-extractor-library"
+                      title="Permalink to this heading"
+                      >#</a
+                    >
+                  </h2>
+                  <p>
+                    The library supports three key skills extraction
+                    functionalities :
+                  </p>
+                  <ol class="arabic simple">
+                    <li>
+                      <p>
+                        Extract AND map skills to a taxonomy of your choice;
+                      </p>
+                    </li>
+                    <li><p>Extract skills from job adverts;</p></li>
+                    <li>
+                      <p>Map a list of skills to a taxonomy of your choice.</p>
+                    </li>
+                  </ol>
+                  <p>
+                    The option
+                    <code class="docutils literal notranslate"
+                      ><span class="pre">local=False</span></code
+                    >
+                    can only be used by those with access to Nesta’s S3 bucket.
+                  </p>
+                  <section id="extract-and-map-skills">
+                    <h3>
+                      1. Extract AND map skills<a
+                        class="headerlink"
+                        href="#extract-and-map-skills"
+                        title="Permalink to this heading"
+                        >#</a
+                      >
+                    </h3>
+                    <p>
+                      If you would like to extract AND map skills in one step,
+                      you are able to do so with the
+                      <code class="docutils literal notranslate"
+                        ><span class="pre">extract_skills</span></code
+                      >
+                      method.
+                    </p>
+                    <div class="highlight-default notranslate">
+                      <div class="highlight">
+                        <pre><span></span><span class="kn">from</span> <span class="nn">ojd_daps_skills.pipeline.extract_skills.extract_skills</span> <span class="kn">import</span> <span class="n">ExtractSkills</span> <span class="c1">#import the module</span>
 
 <span class="n">es</span> <span class="o">=</span> <span class="n">ExtractSkills</span><span class="p">(</span><span class="n">config_name</span><span class="o">=</span><span class="s2">&quot;extract_skills_toy&quot;</span><span class="p">,</span> <span class="n">local</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span> <span class="c1">#instantiate with toy taxonomy configuration file</span>
 
@@ -257,18 +532,34 @@ <h3>1. Extract AND map skills<a class="headerlink" href="#extract-and-map-skills
 <span class="p">]</span> <span class="c1">#toy job advert examples</span>
 
 <span class="n">job_skills_matched</span> <span class="o">=</span> <span class="n">es</span><span class="o">.</span><span class="n">extract_skills</span><span class="p">(</span><span class="n">job_adverts</span><span class="p">)</span> <span class="c1">#match and extract skills to toy taxonomy</span>
-</pre></div>
-</div>
-<p>The outputs are as follows:</p>
-<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">job_skills_matched</span>
+</pre>
+                      </div>
+                    </div>
+                    <p>The outputs are as follows:</p>
+                    <div class="highlight-default notranslate">
+                      <div class="highlight">
+                        <pre><span></span><span class="n">job_skills_matched</span>
 <span class="o">&gt;&gt;&gt;</span> <span class="p">[{</span><span class="s1">&#39;SKILL&#39;</span><span class="p">:</span> <span class="p">[(</span><span class="s1">&#39;communication skills&#39;</span><span class="p">,</span> <span class="p">(</span><span class="s1">&#39;communication, collaboration and creativity&#39;</span><span class="p">,</span> <span class="s1">&#39;S1&#39;</span><span class="p">)),</span> <span class="p">(</span><span class="s1">&#39;maths skills&#39;</span><span class="p">,</span> <span class="p">(</span><span class="s1">&#39;working with computers&#39;</span><span class="p">,</span> <span class="s1">&#39;S5&#39;</span><span class="p">))]},</span> <span class="p">{</span><span class="s1">&#39;SKILL&#39;</span><span class="p">:</span> <span class="p">[(</span><span class="s1">&#39;Excel skills&#39;</span><span class="p">,</span> <span class="p">(</span><span class="s1">&#39;working with computers&#39;</span><span class="p">,</span> <span class="s1">&#39;S5&#39;</span><span class="p">)),</span> <span class="p">(</span><span class="s1">&#39;presentation skills&#39;</span><span class="p">,</span> <span class="p">(</span><span class="s1">&#39;communication, collaboration and creativity&#39;</span><span class="p">,</span> <span class="s1">&#39;S1&#39;</span><span class="p">))]}]</span>
-</pre></div>
-</div>
-</div>
-<div class="section" id="extract-skills">
-<h3>2. Extract skills<a class="headerlink" href="#extract-skills" title="Permalink to this heading">#</a></h3>
-<p>You can simply extract skills from a job advert or list of job adverts:</p>
-<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="kn">from</span> <span class="nn">ojd_daps_skills.pipeline.extract_skills.extract_skills</span> <span class="kn">import</span> <span class="n">ExtractSkills</span> <span class="c1">#import the module</span>
+</pre>
+                      </div>
+                    </div>
+                  </section>
+                  <section id="extract-skills">
+                    <h3>
+                      2. Extract skills<a
+                        class="headerlink"
+                        href="#extract-skills"
+                        title="Permalink to this heading"
+                        >#</a
+                      >
+                    </h3>
+                    <p>
+                      You can simply extract skills from a job advert or list of
+                      job adverts:
+                    </p>
+                    <div class="highlight-default notranslate">
+                      <div class="highlight">
+                        <pre><span></span><span class="kn">from</span> <span class="nn">ojd_daps_skills.pipeline.extract_skills.extract_skills</span> <span class="kn">import</span> <span class="n">ExtractSkills</span> <span class="c1">#import the module</span>
 
 <span class="n">es</span> <span class="o">=</span> <span class="n">ExtractSkills</span><span class="p">(</span><span class="n">config_name</span><span class="o">=</span><span class="s2">&quot;extract_skills_toy&quot;</span><span class="p">,</span> <span class="n">local</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span> <span class="c1">#instantiate with toy taxonomy configuration file</span>
 
@@ -280,19 +571,43 @@ <h3>2. Extract skills<a class="headerlink" href="#extract-skills" title="Permali
 <span class="p">]</span> <span class="c1">#toy job advert examples</span>
 
 <span class="n">predicted_skills</span> <span class="o">=</span> <span class="n">es</span><span class="o">.</span><span class="n">get_skills</span><span class="p">(</span><span class="n">job_adverts</span><span class="p">)</span> <span class="c1">#extract skills from list of job adverts</span>
-</pre></div>
-</div>
-<p>The outputs are as follows:</p>
-<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">predicted_skills</span>
+</pre>
+                      </div>
+                    </div>
+                    <p>The outputs are as follows:</p>
+                    <div class="highlight-default notranslate">
+                      <div class="highlight">
+                        <pre><span></span><span class="n">predicted_skills</span>
 <span class="p">[{</span><span class="s1">&#39;EXPERIENCE&#39;</span><span class="p">:</span> <span class="p">[],</span> <span class="s1">&#39;SKILL&#39;</span><span class="p">:</span> <span class="p">[</span><span class="s1">&#39;communication skills&#39;</span><span class="p">,</span> <span class="s1">&#39;maths skills&#39;</span><span class="p">],</span> <span class="s1">&#39;MULTISKILL&#39;</span><span class="p">:</span> <span class="p">[]},</span> <span class="p">{</span><span class="s1">&#39;EXPERIENCE&#39;</span><span class="p">:</span> <span class="p">[],</span> <span class="s1">&#39;SKILL&#39;</span><span class="p">:</span> <span class="p">[</span><span class="s1">&#39;Excel skills&#39;</span><span class="p">,</span> <span class="s1">&#39;presentation skills&#39;</span><span class="p">],</span> <span class="s1">&#39;MULTISKILL&#39;</span><span class="p">:</span> <span class="p">[]}]</span>
 
-</pre></div>
-</div>
-</div>
-<div class="section" id="map-skills">
-<h3>3. Map skills<a class="headerlink" href="#map-skills" title="Permalink to this heading">#</a></h3>
-<p>You can map either the <code class="docutils literal notranslate"><span class="pre">predicted_skills</span></code> output from <code class="docutils literal notranslate"><span class="pre">get_stills</span></code> or simply map a list of skills to a taxonomy of your choice. In this instance, we map a list of skills:</p>
-<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="kn">from</span> <span class="nn">ojd_daps_skills.pipeline.extract_skills.extract_skills</span> <span class="kn">import</span> <span class="n">ExtractSkills</span> <span class="c1">#import the module</span>
+</pre>
+                      </div>
+                    </div>
+                  </section>
+                  <section id="map-skills">
+                    <h3>
+                      3. Map skills<a
+                        class="headerlink"
+                        href="#map-skills"
+                        title="Permalink to this heading"
+                        >#</a
+                      >
+                    </h3>
+                    <p>
+                      You can map either the
+                      <code class="docutils literal notranslate"
+                        ><span class="pre">predicted_skills</span></code
+                      >
+                      output from
+                      <code class="docutils literal notranslate"
+                        ><span class="pre">get_stills</span></code
+                      >
+                      or simply map a list of skills to a taxonomy of your
+                      choice. In this instance, we map a list of skills:
+                    </p>
+                    <div class="highlight-default notranslate">
+                      <div class="highlight">
+                        <pre><span></span><span class="kn">from</span> <span class="nn">ojd_daps_skills.pipeline.extract_skills.extract_skills</span> <span class="kn">import</span> <span class="n">ExtractSkills</span> <span class="c1">#import the module</span>
 
 <span class="n">es</span> <span class="o">=</span> <span class="n">ExtractSkills</span><span class="p">(</span><span class="n">config_name</span><span class="o">=</span><span class="s2">&quot;extract_skills_toy&quot;</span><span class="p">,</span> <span class="n">local</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span> <span class="c1">#instantiate with toy taxonomy configuration file</span>
 
@@ -305,180 +620,445 @@ <h3>3. Map skills<a class="headerlink" href="#map-skills" title="Permalink to th
 <span class="p">]</span> <span class="c1">#list of skills (and/or multiskills) to be matched</span>
 
 <span class="n">skills_list_matched</span> <span class="o">=</span> <span class="n">es</span><span class="o">.</span><span class="n">map_skills</span><span class="p">(</span><span class="n">skills_list</span><span class="p">)</span> <span class="c1">#match formatted skills to toy taxonomy</span>
-</pre></div>
-</div>
-<p>The outputs are as follows:</p>
-<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">skills_list_matched</span>
+</pre>
+                      </div>
+                    </div>
+                    <p>The outputs are as follows:</p>
+                    <div class="highlight-default notranslate">
+                      <div class="highlight">
+                        <pre><span></span><span class="n">skills_list_matched</span>
 <span class="o">&gt;&gt;&gt;</span> <span class="p">[{</span><span class="s1">&#39;SKILL&#39;</span><span class="p">:</span> <span class="p">[(</span><span class="s1">&#39;Excel skills&#39;</span><span class="p">,</span> <span class="p">(</span><span class="s1">&#39;working with computers&#39;</span><span class="p">,</span> <span class="s1">&#39;S5&#39;</span><span class="p">)),</span> <span class="p">(</span><span class="s1">&#39;Communication&#39;</span><span class="p">,</span> <span class="p">(</span><span class="s1">&#39;use communication techniques&#39;</span><span class="p">,</span> <span class="s1">&#39;cdef&#39;</span><span class="p">)),</span> <span class="p">(</span><span class="s1">&#39;working with computers&#39;</span><span class="p">,</span> <span class="p">(</span><span class="s1">&#39;communication, collaboration and creativity&#39;</span><span class="p">,</span> <span class="s1">&#39;S1&#39;</span><span class="p">))]}]</span>
-</pre></div>
-</div>
-</div>
-</div>
-<div class="section" id="app">
-<h2>App<a class="headerlink" href="#app" title="Permalink to this heading">#</a></h2>
-<p>If you would like to demo the library using a front end, we have also <a class="reference external" href="https://www.nesta.org.uk/data-visualisation-and-interactive/skills-extractor-tool/">built a streamlit app</a> that allows you to extract skills for a given text. The app allows you to paste a job advert of your choice, extract and map skills onto any of the configurations: <code class="docutils literal notranslate"><span class="pre">extract_skills_lightcast</span></code> and <code class="docutils literal notranslate"><span class="pre">extract_skills_esco</span></code>.</p>
-<p><img alt="nesta_esco" src="https://user-images.githubusercontent.com/46863334/221819442-70829216-b763-4717-b802-2f8836ad0874.gif" /></p>
-</div>
-<div class="section" id="development-a-name-development-a">
-<h2>Development <a name="development"></a><a class="headerlink" href="#development-a-name-development-a" title="Permalink to this heading">#</a></h2>
-<p>If you’d like to modify or develop the source code you can clone it by first running:</p>
-<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">git</span> <span class="n">clone</span> <span class="n">git</span><span class="nd">@github</span><span class="o">.</span><span class="n">com</span><span class="p">:</span><span class="n">nestauk</span><span class="o">/</span><span class="n">ojd_daps_skills</span><span class="o">.</span><span class="n">git</span>
-</pre></div>
-</div>
-<div class="section" id="setup">
-<h3>Setup<a class="headerlink" href="#setup" title="Permalink to this heading">#</a></h3>
-<ul class="simple">
-<li><p>Meet the data science cookiecutter <a class="reference external" href="http://nestauk.github.io/ds-cookiecutter/quickstart">requirements</a>, in brief:</p>
-<ul>
-<li><p>Install: <code class="docutils literal notranslate"><span class="pre">direnv</span></code> and <code class="docutils literal notranslate"><span class="pre">conda</span></code></p></li>
-</ul>
-</li>
-<li><p>Create a blank cookiecutter conda log file:</p>
-<ul>
-<li><p><code class="docutils literal notranslate"><span class="pre">mkdir</span> <span class="pre">.cookiecutter/state</span></code></p></li>
-<li><p><code class="docutils literal notranslate"><span class="pre">touch</span> <span class="pre">.cookiecutter/state/conda-create.log</span></code></p></li>
-</ul>
-</li>
-<li><p>Run <code class="docutils literal notranslate"><span class="pre">make</span> <span class="pre">install</span></code> to configure the development environment</p></li>
-<li><p>Download spacy model:</p>
-<ul>
-<li><p><code class="docutils literal notranslate"><span class="pre">python</span> <span class="pre">-m</span> <span class="pre">spacy</span> <span class="pre">download</span> <span class="pre">en_core_web_sm</span></code></p></li>
-</ul>
-</li>
-</ul>
-<p>If you don’t have the AWS CLI installed - you can download a zipped folder of the data <a class="reference external" href="https://open-jobs-indicators.s3.eu-west-1.amazonaws.com/escoe_extension/ojd_daps_skills_data.zip">by clicking here</a>. After downloading and unzipping, it is important that this folder is moved to the project’s parent folder - i.e. <code class="docutils literal notranslate"><span class="pre">ojd_daps_skills/</span></code>.</p>
-</div>
-<div class="section" id="project-structure">
-<h3>Project structure<a class="headerlink" href="#project-structure" title="Permalink to this heading">#</a></h3>
-<p>The project is split into three core pipeline folders:</p>
-<ul class="simple">
-<li><p><a class="reference external" href="https://github.com/nestauk/ojd_daps_skills/tree/dev/ojd_daps_skills/pipeline/skill_ner">skill_ner</a> - Training a Named Entity Recognition (NER) model to extract skills from job adverts.</p></li>
-<li><p><a class="reference external" href="https://github.com/nestauk/ojd_daps_skills/tree/dev/ojd_daps_skills/pipeline/skill_ner_mapping">skill_ner_mapping</a> - Matching skills to an existing skills taxonomy using semantic similarity.</p></li>
-<li><p><a class="reference external" href="https://github.com/nestauk/ojd_daps_skills/tree/dev/ojd_daps_skills/pipeline/extract_skills">extract_skills</a> - User friendly functionality to extract and map skills from job adverts.</p></li>
-</ul>
-<p>Much more about these steps can be found in each of the pipeline folder READMEs.</p>
-<p><img alt="" src="_images/overview.png" />
-<img alt="" src="_images/overview_example.png" />
-<em>An example of extracting skills and mapping them to the ESCO taxonomy.</em></p>
-</div>
-<div class="section" id="testing">
-<h3>Testing<a class="headerlink" href="#testing" title="Permalink to this heading">#</a></h3>
-<p>Some functions have tests, these can be checked by running</p>
-<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">pytest</span>
-</pre></div>
-</div>
-</div>
-<div class="section" id="analysis">
-<h3>Analysis<a class="headerlink" href="#analysis" title="Permalink to this heading">#</a></h3>
-<p>Various pieces of analysis are done in the <a class="reference external" href="https://github.com/nestauk/ojd_daps_skills/tree/dev/ojd_daps_skills/analysis/">analysis folder</a>. These require access to various datasets from Nesta’s private S3 bucket and are therefore only designed for internal Nesta use.</p>
-</div>
-<div class="section" id="contributor-guidelines">
-<h3>Contributor guidelines<a class="headerlink" href="#contributor-guidelines" title="Permalink to this heading">#</a></h3>
-<p>The technical and working style guidelines can be found <a class="reference external" href="https://github.com/nestauk/ds-cookiecutter/blob/master/GUIDELINES.md">here</a>.</p>
-<hr class="docutils" />
-<p><small><p>This project was made possible via funding from the <a target="_blank" href="https://www.escoe.ac.uk/">Economic Statistics Centre of Excellence</a></p></small></p>
-<p><small><p>Project template is based on <a target="_blank" href="https://github.com/nestauk/ds-cookiecutter">Nesta’s data science project template</a>
-(<a href="http://nestauk.github.io/ds-cookiecutter">Read the docs here</a>).
-</small></p>
-</div>
-</div>
-</div>
+</pre>
+                      </div>
+                    </div>
+                  </section>
+                </section>
+                <section id="app">
+                  <h2>
+                    App<a
+                      class="headerlink"
+                      href="#app"
+                      title="Permalink to this heading"
+                      >#</a
+                    >
+                  </h2>
+                  <p>
+                    If you would like to demo the library using a front end, we
+                    have also
+                    <a
+                      class="reference external"
+                      href="https://www.nesta.org.uk/data-visualisation-and-interactive/skills-extractor-tool/"
+                      >built a streamlit app</a
+                    >
+                    that allows you to extract skills for a given text. The app
+                    allows you to paste a job advert of your choice, extract and
+                    map skills onto any of the configurations:
+                    <code class="docutils literal notranslate"
+                      ><span class="pre">extract_skills_lightcast</span></code
+                    >
+                    and
+                    <code class="docutils literal notranslate"
+                      ><span class="pre">extract_skills_esco</span></code
+                    >.
+                  </p>
+                  <p>
+                    <img
+                      alt="nesta_esco"
+                      src="https://user-images.githubusercontent.com/46863334/221819442-70829216-b763-4717-b802-2f8836ad0874.gif"
+                    />
+                  </p>
+                </section>
+                <section id="development">
+                  <h2>
+                    Development <a name="development"></a
+                    ><a
+                      class="headerlink"
+                      href="#development"
+                      title="Permalink to this heading"
+                      >#</a
+                    >
+                  </h2>
+                  <p>
+                    If you’d like to modify or develop the source code you can
+                    clone it by first running:
+                  </p>
+                  <div class="highlight-default notranslate">
+                    <div class="highlight">
+                      <pre><span></span><span class="n">git</span> <span class="n">clone</span> <span class="n">git</span><span class="nd">@github</span><span class="o">.</span><span class="n">com</span><span class="p">:</span><span class="n">nestauk</span><span class="o">/</span><span class="n">ojd_daps_skills</span><span class="o">.</span><span class="n">git</span>
+</pre>
+                    </div>
+                  </div>
+                  <section id="setup">
+                    <h3>
+                      Setup<a
+                        class="headerlink"
+                        href="#setup"
+                        title="Permalink to this heading"
+                        >#</a
+                      >
+                    </h3>
+                    <ul class="simple">
+                      <li>
+                        <p>
+                          Meet the data science cookiecutter
+                          <a
+                            class="reference external"
+                            href="http://nestauk.github.io/ds-cookiecutter/quickstart"
+                            >requirements</a
+                          >, in brief:
+                        </p>
+                        <ul>
+                          <li>
+                            <p>
+                              Install:
+                              <code class="docutils literal notranslate"
+                                ><span class="pre">direnv</span></code
+                              >
+                              and
+                              <code class="docutils literal notranslate"
+                                ><span class="pre">conda</span></code
+                              >
+                            </p>
+                          </li>
+                        </ul>
+                      </li>
+                      <li>
+                        <p>Create a blank cookiecutter conda log file:</p>
+                        <ul>
+                          <li>
+                            <p>
+                              <code class="docutils literal notranslate"
+                                ><span class="pre">mkdir</span>
+                                <span class="pre"
+                                  >.cookiecutter/state</span
+                                ></code
+                              >
+                            </p>
+                          </li>
+                          <li>
+                            <p>
+                              <code class="docutils literal notranslate"
+                                ><span class="pre">touch</span>
+                                <span class="pre"
+                                  >.cookiecutter/state/conda-create.log</span
+                                ></code
+                              >
+                            </p>
+                          </li>
+                        </ul>
+                      </li>
+                      <li>
+                        <p>
+                          Run
+                          <code class="docutils literal notranslate"
+                            ><span class="pre">make</span>
+                            <span class="pre">install</span></code
+                          >
+                          to configure the development environment
+                        </p>
+                      </li>
+                    </ul>
+                  </section>
+                  <section id="project-structure">
+                    <h3>
+                      Project structure<a
+                        class="headerlink"
+                        href="#project-structure"
+                        title="Permalink to this heading"
+                        >#</a
+                      >
+                    </h3>
+                    <p>
+                      The project is split into three core pipeline folders:
+                    </p>
+                    <ul class="simple">
+                      <li>
+                        <p>
+                          <a
+                            class="reference external"
+                            href="https://github.com/nestauk/ojd_daps_skills/tree/dev/ojd_daps_skills/pipeline/skill_ner"
+                            >skill_ner</a
+                          >
+                          - Training a Named Entity Recognition (NER) model to
+                          extract skills from job adverts.
+                        </p>
+                      </li>
+                      <li>
+                        <p>
+                          <a
+                            class="reference external"
+                            href="https://github.com/nestauk/ojd_daps_skills/tree/dev/ojd_daps_skills/pipeline/skill_ner_mapping"
+                            >skill_ner_mapping</a
+                          >
+                          - Matching skills to an existing skills taxonomy using
+                          semantic similarity.
+                        </p>
+                      </li>
+                      <li>
+                        <p>
+                          <a
+                            class="reference external"
+                            href="https://github.com/nestauk/ojd_daps_skills/tree/dev/ojd_daps_skills/pipeline/extract_skills"
+                            >extract_skills</a
+                          >
+                          - User friendly functionality to extract and map
+                          skills from job adverts.
+                        </p>
+                      </li>
+                    </ul>
+                    <p>
+                      Much more about these steps can be found in each of the
+                      pipeline folder READMEs.
+                    </p>
+                    <p>
+                      <img alt="" src="_images/overview.png" />
+                      <img alt="" src="_images/overview_example.png" />
+                      <em
+                        >An example of extracting skills and mapping them to the
+                        ESCO taxonomy.</em
+                      >
+                    </p>
+                  </section>
+                  <section id="testing">
+                    <h3>
+                      Testing<a
+                        class="headerlink"
+                        href="#testing"
+                        title="Permalink to this heading"
+                        >#</a
+                      >
+                    </h3>
+                    <p>
+                      Some functions have tests, these can be checked by running
+                    </p>
+                    <div class="highlight-default notranslate">
+                      <div class="highlight">
+                        <pre><span></span><span class="n">pytest</span>
+</pre>
+                      </div>
+                    </div>
+                  </section>
+                  <section id="analysis">
+                    <h3>
+                      Analysis<a
+                        class="headerlink"
+                        href="#analysis"
+                        title="Permalink to this heading"
+                        >#</a
+                      >
+                    </h3>
+                    <p>
+                      Various pieces of analysis are done in the
+                      <a
+                        class="reference external"
+                        href="https://github.com/nestauk/ojd_daps_skills/tree/dev/ojd_daps_skills/analysis/"
+                        >analysis folder</a
+                      >. These require access to various datasets from Nesta’s
+                      private S3 bucket and are therefore only designed for
+                      internal Nesta use.
+                    </p>
+                  </section>
+                  <section id="contributor-guidelines">
+                    <h3>
+                      Contributor guidelines<a
+                        class="headerlink"
+                        href="#contributor-guidelines"
+                        title="Permalink to this heading"
+                        >#</a
+                      >
+                    </h3>
+                    <p>
+                      The technical and working style guidelines can be found
+                      <a
+                        class="reference external"
+                        href="https://github.com/nestauk/ds-cookiecutter/blob/master/GUIDELINES.md"
+                        >here</a
+                      >.
+                    </p>
+                    <p>
+                      If contributing, changes will need to be pushed to a new
+                      branch in order for our code checks to be triggered.
+                    </p>
+                    <hr class="docutils" />
+                    <p>
+                      <small
+                        ><p>
+                          This project was made possible via funding from the
+                          <a target="_blank" href="https://www.escoe.ac.uk/"
+                            >Economic Statistics Centre of Excellence</a
+                          >
+                        </p></small
+                      >
+                    </p>
+                    <p>
+                      <small
+                        ><p>
+                          Project template is based on
+                          <a
+                            target="_blank"
+                            href="https://github.com/nestauk/ds-cookiecutter"
+                            >Nesta’s data science project template</a
+                          >
+                          (<a href="http://nestauk.github.io/ds-cookiecutter"
+                            >Read the docs here</a
+                          >).
+                        </p></small
+                      >
+                    </p>
+                  </section>
+                </section>
+              </section>
+            </article>
+          </div>
+          <footer>
+            <div class="related-pages">
+              <a class="next-page" href="custom_usage.html">
+                <div class="page-info">
+                  <div class="context">
+                    <span>Next</span>
+                  </div>
+                  <div class="title">Custom Usage</div>
+                </div>
+                <svg class="furo-related-icon">
+                  <use href="#svg-arrow-right"></use>
+                </svg>
+              </a>
+              <a class="prev-page" href="index.html">
+                <svg class="furo-related-icon">
+                  <use href="#svg-arrow-right"></use>
+                </svg>
+                <div class="page-info">
+                  <div class="context">
+                    <span>Previous</span>
+                  </div>
 
-        </article>
-      </div>
-      <footer>
-        
-        <div class="related-pages">
-          <a class="next-page" href="custom_usage.html">
-              <div class="page-info">
-                <div class="context">
-                  <span>Next</span>
+                  <div class="title">Home</div>
                 </div>
-                <div class="title">Custom Usage</div>
-              </div>
-              <svg class="furo-related-icon"><use href="#svg-arrow-right"></use></svg>
-            </a>
-          <a class="prev-page" href="index.html">
-              <svg class="furo-related-icon"><use href="#svg-arrow-right"></use></svg>
-              <div class="page-info">
-                <div class="context">
-                  <span>Previous</span>
+              </a>
+            </div>
+            <div class="bottom-of-page">
+              <div class="left-details">
+                <div class="copyright">
+                  Copyright &#169; 2022, Liz Gallagher, India Kerle
                 </div>
-                
-                <div class="title">Home</div>
-                
+                Made with <a href="https://www.sphinx-doc.org/">Sphinx</a> and
+                <a class="muted-link" href="https://pradyunsg.me">@pradyunsg</a
+                >'s
+
+                <a href="https://github.com/pradyunsg/furo">Furo</a>
               </div>
-            </a>
+              <div class="right-details">
+                <div class="icons"></div>
+              </div>
+            </div>
+          </footer>
         </div>
-        <div class="bottom-of-page">
-          <div class="left-details">
-            <div class="copyright">
-                Copyright &#169; 2022, Liz Gallagher, India Kerle
+        <aside class="toc-drawer">
+          <div class="toc-sticky toc-scroll">
+            <div class="toc-title-container">
+              <span class="toc-title"> On this page </span>
             </div>
-            Made with <a href="https://www.sphinx-doc.org/">Sphinx</a> and <a class="muted-link" href="https://pradyunsg.me">@pradyunsg</a>'s
-            
-            <a href="https://github.com/pradyunsg/furo">Furo</a>
-            
-          </div>
-          <div class="right-details">
-            <div class="icons">
-              
+            <div class="toc-tree-container">
+              <div class="toc-tree">
+                <ul>
+                  <li>
+                    <a class="reference internal" href="#">Skills Extractor</a>
+                    <ul>
+                      <li>
+                        <a
+                          class="reference internal"
+                          href="#welcome-to-nesta-s-skills-extractor-library"
+                          >Welcome to Nesta’s Skills Extractor Library</a
+                        >
+                      </li>
+                      <li>
+                        <a class="reference internal" href="#installation"
+                          >Installation <a name="installation"></a
+                        ></a>
+                      </li>
+                      <li>
+                        <a
+                          class="reference internal"
+                          href="#tl-dr-using-nesta-s-skills-extractor-library"
+                          >TL;DR: Using Nesta’s Skills Extractor library
+                          <a name="usage"></a
+                        ></a>
+                        <ul>
+                          <li>
+                            <a
+                              class="reference internal"
+                              href="#extract-and-map-skills"
+                              >1. Extract AND map skills</a
+                            >
+                          </li>
+                          <li>
+                            <a class="reference internal" href="#extract-skills"
+                              >2. Extract skills</a
+                            >
+                          </li>
+                          <li>
+                            <a class="reference internal" href="#map-skills"
+                              >3. Map skills</a
+                            >
+                          </li>
+                        </ul>
+                      </li>
+                      <li><a class="reference internal" href="#app">App</a></li>
+                      <li>
+                        <a class="reference internal" href="#development"
+                          >Development <a name="development"></a
+                        ></a>
+                        <ul>
+                          <li>
+                            <a class="reference internal" href="#setup"
+                              >Setup</a
+                            >
+                          </li>
+                          <li>
+                            <a
+                              class="reference internal"
+                              href="#project-structure"
+                              >Project structure</a
+                            >
+                          </li>
+                          <li>
+                            <a class="reference internal" href="#testing"
+                              >Testing</a
+                            >
+                          </li>
+                          <li>
+                            <a class="reference internal" href="#analysis"
+                              >Analysis</a
+                            >
+                          </li>
+                          <li>
+                            <a
+                              class="reference internal"
+                              href="#contributor-guidelines"
+                              >Contributor guidelines</a
+                            >
+                          </li>
+                        </ul>
+                      </li>
+                    </ul>
+                  </li>
+                </ul>
+              </div>
             </div>
           </div>
-        </div>
-        
-      </footer>
-    </div>
-    <aside class="toc-drawer">
-      
-      
-      <div class="toc-sticky toc-scroll">
-        <div class="toc-title-container">
-          <span class="toc-title">
-            On this page
-          </span>
-        </div>
-        <div class="toc-tree-container">
-          <div class="toc-tree">
-            <ul>
-<li><a class="reference internal" href="#">Skills Extractor</a><ul>
-<li><a class="reference internal" href="#welcome-to-nesta-s-skills-extractor-library">Welcome to Nesta’s Skills Extractor Library</a></li>
-<li><a class="reference internal" href="#installation-a-name-installation-a">Installation <a name="installation"></a></a><ul>
-<li><a class="reference internal" href="#aws-cli">AWS CLI</a></li>
-</ul>
-</li>
-<li><a class="reference internal" href="#tl-dr-using-nesta-s-skills-extractor-library-a-name-usage-a">TL;DR: Using Nesta’s Skills Extractor library <a name="usage"></a></a><ul>
-<li><a class="reference internal" href="#extract-and-map-skills">1. Extract AND map skills</a></li>
-<li><a class="reference internal" href="#extract-skills">2. Extract skills</a></li>
-<li><a class="reference internal" href="#map-skills">3. Map skills</a></li>
-</ul>
-</li>
-<li><a class="reference internal" href="#app">App</a></li>
-<li><a class="reference internal" href="#development-a-name-development-a">Development <a name="development"></a></a><ul>
-<li><a class="reference internal" href="#setup">Setup</a></li>
-<li><a class="reference internal" href="#project-structure">Project structure</a></li>
-<li><a class="reference internal" href="#testing">Testing</a></li>
-<li><a class="reference internal" href="#analysis">Analysis</a></li>
-<li><a class="reference internal" href="#contributor-guidelines">Contributor guidelines</a></li>
-</ul>
-</li>
-</ul>
-</li>
-</ul>
-
-          </div>
-        </div>
+        </aside>
       </div>
-      
-      
-    </aside>
-  </div>
-</div><script data-url_root="./" id="documentation_options" src="_static/documentation_options.js"></script>
+    </div>
+    <script
+      data-url_root="./"
+      id="documentation_options"
+      src="_static/documentation_options.js"
+    ></script>
     <script src="_static/jquery.js"></script>
     <script src="_static/underscore.js"></script>
     <script src="_static/_sphinx_javascript_frameworks_compat.js"></script>
     <script src="_static/doctools.js"></script>
     <script src="_static/sphinx_highlight.js"></script>
     <script src="_static/scripts/furo.js"></script>
-    </body>
-</html>
\ No newline at end of file
+  </body>
+</html>
diff --git a/docs/build/html/custom_usage.html b/docs/build/html/custom_usage.html
index dbb3a4fe..f1e48ec0 100644
--- a/docs/build/html/custom_usage.html
+++ b/docs/build/html/custom_usage.html
@@ -1,443 +1,1248 @@
-<!doctype html>
+<!DOCTYPE html>
 <html class="no-js" lang="en">
-  <head><meta charset="utf-8"/>
-    <meta name="viewport" content="width=device-width,initial-scale=1"/>
-    <meta name="color-scheme" content="light dark"><link rel="author" title="About these documents" href="about.html" /><link rel="index" title="Index" href="genindex.html" /><link rel="search" title="Search" href="search.html" /><link rel="next" title="Pipeline summary and metrics" href="pipeline_summary.html" /><link rel="prev" title="Skills Extractor" href="about.html" />
+  <head>
+    <meta charset="utf-8" />
+    <meta name="viewport" content="width=device-width,initial-scale=1" />
+    <meta name="color-scheme" content="light dark" />
+    <meta
+      name="generator"
+      content="Docutils 0.18.1: http://docutils.sourceforge.net/"
+    />
+    <link rel="author" title="About these documents" href="about.html" />
+    <link rel="index" title="Index" href="genindex.html" />
+    <link rel="search" title="Search" href="search.html" />
+    <link
+      rel="next"
+      title="Pipeline summary and metrics"
+      href="pipeline_summary.html"
+    />
+    <link rel="prev" title="Skills Extractor" href="about.html" />
 
-    <meta name="generator" content="sphinx-5.3.0, furo 2022.09.29"/>
-        <title>Custom Usage - Skills Extractor v1.0.1 documentation</title>
-      <link rel="stylesheet" type="text/css" href="_static/pygments.css" />
-    <link rel="stylesheet" type="text/css" href="_static/styles/furo.css?digest=d81277517bee4d6b0349d71bb2661d4890b5617c" />
-    <link rel="stylesheet" type="text/css" href="_static/styles/furo-extensions.css?digest=30d1aed668e5c3a91c3e3bf6a60b675221979f0e" />
-    
-    
+    <meta name="generator" content="sphinx-5.3.0, furo 2022.09.29" />
+    <title>Custom Usage - Skills Extractor v1.0.1 documentation</title>
+    <link rel="stylesheet" type="text/css" href="_static/pygments.css" />
+    <link
+      rel="stylesheet"
+      type="text/css"
+      href="_static/styles/furo.css?digest=d81277517bee4d6b0349d71bb2661d4890b5617c"
+    />
+    <link
+      rel="stylesheet"
+      type="text/css"
+      href="_static/styles/furo-extensions.css?digest=30d1aed668e5c3a91c3e3bf6a60b675221979f0e"
+    />
 
-
-<style>
-  body {
-    --color-code-background: #f8f8f8;
-  --color-code-foreground: black;
-  
-  }
-  @media not print {
-    body[data-theme="dark"] {
-      --color-code-background: #202020;
-  --color-code-foreground: #d0d0d0;
-  
-    }
-    @media (prefers-color-scheme: dark) {
-      body:not([data-theme="light"]) {
-        --color-code-background: #202020;
-  --color-code-foreground: #d0d0d0;
-  
+    <style>
+      body {
+        --color-code-background: #f8f8f8;
+        --color-code-foreground: black;
+      }
+      @media not print {
+        body[data-theme="dark"] {
+          --color-code-background: #202020;
+          --color-code-foreground: #d0d0d0;
+        }
+        @media (prefers-color-scheme: dark) {
+          body:not([data-theme="light"]) {
+            --color-code-background: #202020;
+            --color-code-foreground: #d0d0d0;
+          }
+        }
       }
-    }
-  }
-</style></head>
+    </style>
+  </head>
   <body>
-    
     <script>
       document.body.dataset.theme = localStorage.getItem("theme") || "auto";
     </script>
-    
 
-<svg xmlns="http://www.w3.org/2000/svg" style="display: none;">
-  <symbol id="svg-toc" viewBox="0 0 24 24">
-    <title>Contents</title>
-    <svg stroke="currentColor" fill="currentColor" stroke-width="0" viewBox="0 0 1024 1024">
-      <path d="M408 442h480c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8H408c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8zm-8 204c0 4.4 3.6 8 8 8h480c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8H408c-4.4 0-8 3.6-8 8v56zm504-486H120c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8h784c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8zm0 632H120c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8h784c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8zM115.4 518.9L271.7 642c5.8 4.6 14.4.5 14.4-6.9V388.9c0-7.4-8.5-11.5-14.4-6.9L115.4 505.1a8.74 8.74 0 0 0 0 13.8z"/>
-    </svg>
-  </symbol>
-  <symbol id="svg-menu" viewBox="0 0 24 24">
-    <title>Menu</title>
-    <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
-      stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="feather-menu">
-      <line x1="3" y1="12" x2="21" y2="12"></line>
-      <line x1="3" y1="6" x2="21" y2="6"></line>
-      <line x1="3" y1="18" x2="21" y2="18"></line>
-    </svg>
-  </symbol>
-  <symbol id="svg-arrow-right" viewBox="0 0 24 24">
-    <title>Expand</title>
-    <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
-      stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="feather-chevron-right">
-      <polyline points="9 18 15 12 9 6"></polyline>
-    </svg>
-  </symbol>
-  <symbol id="svg-sun" viewBox="0 0 24 24">
-    <title>Light mode</title>
-    <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
-      stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round" class="feather-sun">
-      <circle cx="12" cy="12" r="5"></circle>
-      <line x1="12" y1="1" x2="12" y2="3"></line>
-      <line x1="12" y1="21" x2="12" y2="23"></line>
-      <line x1="4.22" y1="4.22" x2="5.64" y2="5.64"></line>
-      <line x1="18.36" y1="18.36" x2="19.78" y2="19.78"></line>
-      <line x1="1" y1="12" x2="3" y2="12"></line>
-      <line x1="21" y1="12" x2="23" y2="12"></line>
-      <line x1="4.22" y1="19.78" x2="5.64" y2="18.36"></line>
-      <line x1="18.36" y1="5.64" x2="19.78" y2="4.22"></line>
-    </svg>
-  </symbol>
-  <symbol id="svg-moon" viewBox="0 0 24 24">
-    <title>Dark mode</title>
-    <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
-      stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round" class="icon-tabler-moon">
-      <path stroke="none" d="M0 0h24v24H0z" fill="none" />
-      <path d="M12 3c.132 0 .263 0 .393 0a7.5 7.5 0 0 0 7.92 12.446a9 9 0 1 1 -8.313 -12.454z" />
-    </svg>
-  </symbol>
-  <symbol id="svg-sun-half" viewBox="0 0 24 24">
-    <title>Auto light/dark mode</title>
-    <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
-      stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round" class="icon-tabler-shadow">
-      <path stroke="none" d="M0 0h24v24H0z" fill="none"/>
-      <circle cx="12" cy="12" r="9" />
-      <path d="M13 12h5" />
-      <path d="M13 15h4" />
-      <path d="M13 18h1" />
-      <path d="M13 9h4" />
-      <path d="M13 6h1" />
+    <svg xmlns="http://www.w3.org/2000/svg" style="display: none">
+      <symbol id="svg-toc" viewBox="0 0 24 24">
+        <title>Contents</title>
+        <svg
+          stroke="currentColor"
+          fill="currentColor"
+          stroke-width="0"
+          viewBox="0 0 1024 1024"
+        >
+          <path
+            d="M408 442h480c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8H408c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8zm-8 204c0 4.4 3.6 8 8 8h480c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8H408c-4.4 0-8 3.6-8 8v56zm504-486H120c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8h784c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8zm0 632H120c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8h784c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8zM115.4 518.9L271.7 642c5.8 4.6 14.4.5 14.4-6.9V388.9c0-7.4-8.5-11.5-14.4-6.9L115.4 505.1a8.74 8.74 0 0 0 0 13.8z"
+          />
+        </svg>
+      </symbol>
+      <symbol id="svg-menu" viewBox="0 0 24 24">
+        <title>Menu</title>
+        <svg
+          xmlns="http://www.w3.org/2000/svg"
+          viewBox="0 0 24 24"
+          fill="none"
+          stroke="currentColor"
+          stroke-width="2"
+          stroke-linecap="round"
+          stroke-linejoin="round"
+          class="feather-menu"
+        >
+          <line x1="3" y1="12" x2="21" y2="12"></line>
+          <line x1="3" y1="6" x2="21" y2="6"></line>
+          <line x1="3" y1="18" x2="21" y2="18"></line>
+        </svg>
+      </symbol>
+      <symbol id="svg-arrow-right" viewBox="0 0 24 24">
+        <title>Expand</title>
+        <svg
+          xmlns="http://www.w3.org/2000/svg"
+          viewBox="0 0 24 24"
+          fill="none"
+          stroke="currentColor"
+          stroke-width="2"
+          stroke-linecap="round"
+          stroke-linejoin="round"
+          class="feather-chevron-right"
+        >
+          <polyline points="9 18 15 12 9 6"></polyline>
+        </svg>
+      </symbol>
+      <symbol id="svg-sun" viewBox="0 0 24 24">
+        <title>Light mode</title>
+        <svg
+          xmlns="http://www.w3.org/2000/svg"
+          viewBox="0 0 24 24"
+          fill="none"
+          stroke="currentColor"
+          stroke-width="1.5"
+          stroke-linecap="round"
+          stroke-linejoin="round"
+          class="feather-sun"
+        >
+          <circle cx="12" cy="12" r="5"></circle>
+          <line x1="12" y1="1" x2="12" y2="3"></line>
+          <line x1="12" y1="21" x2="12" y2="23"></line>
+          <line x1="4.22" y1="4.22" x2="5.64" y2="5.64"></line>
+          <line x1="18.36" y1="18.36" x2="19.78" y2="19.78"></line>
+          <line x1="1" y1="12" x2="3" y2="12"></line>
+          <line x1="21" y1="12" x2="23" y2="12"></line>
+          <line x1="4.22" y1="19.78" x2="5.64" y2="18.36"></line>
+          <line x1="18.36" y1="5.64" x2="19.78" y2="4.22"></line>
+        </svg>
+      </symbol>
+      <symbol id="svg-moon" viewBox="0 0 24 24">
+        <title>Dark mode</title>
+        <svg
+          xmlns="http://www.w3.org/2000/svg"
+          viewBox="0 0 24 24"
+          fill="none"
+          stroke="currentColor"
+          stroke-width="1.5"
+          stroke-linecap="round"
+          stroke-linejoin="round"
+          class="icon-tabler-moon"
+        >
+          <path stroke="none" d="M0 0h24v24H0z" fill="none" />
+          <path
+            d="M12 3c.132 0 .263 0 .393 0a7.5 7.5 0 0 0 7.92 12.446a9 9 0 1 1 -8.313 -12.454z"
+          />
+        </svg>
+      </symbol>
+      <symbol id="svg-sun-half" viewBox="0 0 24 24">
+        <title>Auto light/dark mode</title>
+        <svg
+          xmlns="http://www.w3.org/2000/svg"
+          viewBox="0 0 24 24"
+          fill="none"
+          stroke="currentColor"
+          stroke-width="1.5"
+          stroke-linecap="round"
+          stroke-linejoin="round"
+          class="icon-tabler-shadow"
+        >
+          <path stroke="none" d="M0 0h24v24H0z" fill="none" />
+          <circle cx="12" cy="12" r="9" />
+          <path d="M13 12h5" />
+          <path d="M13 15h4" />
+          <path d="M13 18h1" />
+          <path d="M13 9h4" />
+          <path d="M13 6h1" />
+        </svg>
+      </symbol>
     </svg>
-  </symbol>
-</svg>
-
-<input type="checkbox" class="sidebar-toggle" name="__navigation" id="__navigation">
-<input type="checkbox" class="sidebar-toggle" name="__toc" id="__toc">
-<label class="overlay sidebar-overlay" for="__navigation">
-  <div class="visually-hidden">Hide navigation sidebar</div>
-</label>
-<label class="overlay toc-overlay" for="__toc">
-  <div class="visually-hidden">Hide table of contents sidebar</div>
-</label>
-
-
-
-<div class="page">
-  <header class="mobile-header">
-    <div class="header-left">
-      <label class="nav-overlay-icon" for="__navigation">
-        <div class="visually-hidden">Toggle site navigation sidebar</div>
-        <i class="icon"><svg><use href="#svg-menu"></use></svg></i>
-      </label>
-    </div>
-    <div class="header-center">
-      <a href="index.html"><div class="brand">Skills Extractor v1.0.1 documentation</div></a>
-    </div>
-    <div class="header-right">
-      <div class="theme-toggle-container theme-toggle-header">
-        <button class="theme-toggle">
-          <div class="visually-hidden">Toggle Light / Dark / Auto color theme</div>
-          <svg class="theme-icon-when-auto"><use href="#svg-sun-half"></use></svg>
-          <svg class="theme-icon-when-dark"><use href="#svg-moon"></use></svg>
-          <svg class="theme-icon-when-light"><use href="#svg-sun"></use></svg>
-        </button>
-      </div>
-      <label class="toc-overlay-icon toc-header-icon" for="__toc">
-        <div class="visually-hidden">Toggle table of contents sidebar</div>
-        <i class="icon"><svg><use href="#svg-toc"></use></svg></i>
-      </label>
-    </div>
-  </header>
-  <aside class="sidebar-drawer">
-    <div class="sidebar-container">
-      
-      <div class="sidebar-sticky"><a class="sidebar-brand centered" href="index.html">
-  
-  <div class="sidebar-logo-container">
-    <img class="sidebar-logo" src="_static/nesta_escoe_transparent.png" alt="Logo"/>
-  </div>
-  
-  <span class="sidebar-brand-text">Skills Extractor v1.0.1 documentation</span>
-  
-</a><form class="sidebar-search-container" method="get" action="search.html" role="search">
-  <input class="sidebar-search" placeholder=Search name="q" aria-label="Search">
-  <input type="hidden" name="check_keywords" value="yes">
-  <input type="hidden" name="area" value="default">
-</form>
-<div id="searchbox"></div><div class="sidebar-scroll"><div class="sidebar-tree">
-  <ul class="current">
-<li class="toctree-l1"><a class="reference internal" href="about.html">Skills Extractor</a></li>
-<li class="toctree-l1 current current-page"><a class="current reference internal" href="#">Custom Usage</a></li>
-<li class="toctree-l1"><a class="reference internal" href="pipeline_summary.html">Pipeline summary and metrics</a></li>
-<li class="toctree-l1"><a class="reference internal" href="model_card.html">Model Cards</a></li>
-<li class="toctree-l1"><a class="reference internal" href="labelling.html">Entity Labelling</a></li>
-<li class="toctree-l1"><a class="reference internal" href="extract_skills.html">The <code class="docutils literal notranslate"><span class="pre">ExtractSkills</span></code> class</a></li>
-<li class="toctree-l1"><a class="reference internal" href="license.html">The MIT License (MIT)</a></li>
-</ul>
 
-</div>
-</div>
+    <input
+      type="checkbox"
+      class="sidebar-toggle"
+      name="__navigation"
+      id="__navigation"
+    />
+    <input type="checkbox" class="sidebar-toggle" name="__toc" id="__toc" />
+    <label class="overlay sidebar-overlay" for="__navigation">
+      <div class="visually-hidden">Hide navigation sidebar</div>
+    </label>
+    <label class="overlay toc-overlay" for="__toc">
+      <div class="visually-hidden">Hide table of contents sidebar</div>
+    </label>
 
-      </div>
-      
-    </div>
-  </aside>
-  <div class="main">
-    <div class="content">
-      <div class="article-container">
-        <a href="#" class="back-to-top muted-link">
-          <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24">
-            <path d="M13 20h-2V8l-5.5 5.5-1.42-1.42L12 4.16l7.92 7.92-1.42 1.42L13 8v12z"></path>
-          </svg>
-          <span>Back to top</span>
-        </a>
-        <div class="content-icon-container">
-          
-<div class="theme-toggle-container theme-toggle-content">
+    <div class="page">
+      <header class="mobile-header">
+        <div class="header-left">
+          <label class="nav-overlay-icon" for="__navigation">
+            <div class="visually-hidden">Toggle site navigation sidebar</div>
+            <i class="icon"
+              ><svg><use href="#svg-menu"></use></svg
+            ></i>
+          </label>
+        </div>
+        <div class="header-center">
+          <a href="index.html"
+            ><div class="brand">Skills Extractor v1.0.1 documentation</div></a
+          >
+        </div>
+        <div class="header-right">
+          <div class="theme-toggle-container theme-toggle-header">
             <button class="theme-toggle">
-              <div class="visually-hidden">Toggle Light / Dark / Auto color theme</div>
-              <svg class="theme-icon-when-auto"><use href="#svg-sun-half"></use></svg>
-              <svg class="theme-icon-when-dark"><use href="#svg-moon"></use></svg>
-              <svg class="theme-icon-when-light"><use href="#svg-sun"></use></svg>
+              <div class="visually-hidden">
+                Toggle Light / Dark / Auto color theme
+              </div>
+              <svg class="theme-icon-when-auto">
+                <use href="#svg-sun-half"></use>
+              </svg>
+              <svg class="theme-icon-when-dark">
+                <use href="#svg-moon"></use>
+              </svg>
+              <svg class="theme-icon-when-light">
+                <use href="#svg-sun"></use>
+              </svg>
             </button>
           </div>
-          <label class="toc-overlay-icon toc-content-icon" for="__toc">
+          <label class="toc-overlay-icon toc-header-icon" for="__toc">
             <div class="visually-hidden">Toggle table of contents sidebar</div>
-            <i class="icon"><svg><use href="#svg-toc"></use></svg></i>
+            <i class="icon"
+              ><svg><use href="#svg-toc"></use></svg
+            ></i>
           </label>
         </div>
-        <article role="main">
-          <div class="section" id="custom-usage">
-<h1>Custom Usage<a class="headerlink" href="#custom-usage" title="Permalink to this heading">#</a></h1>
-<p><code class="docutils literal notranslate"><span class="pre">extract_skills.py</span></code> combines the prediction of skills using code from <a class="reference external" href="https://github.com/nestauk/ojd_daps_skills/tree/dev/ojd_daps_skills/pipeline/skill_ner">skill_ner</a> with the mapping of skills to a taxonomy using code from <a class="reference external" href="https://github.com/nestauk/ojd_daps_skills/tree/dev/ojd_daps_skills/pipeline/skill_ner_mapping">skill_ner_mapping</a>.</p>
-<p>This page explains more about the custom usage of this class including creating a custom config file and mapping to another taxonomy. To do this you will need to clone the repo. Please refer to the main documentation page for the <a class="reference external" href="https://nestauk.github.io/ojd_daps_skills/build/html/about.html#development-a-name-development-a">development setup instructions</a> for this package and the core usage.</p>
-<div class="section" id="configuration-files-a-name-config-files-a">
-<h2>Configuration files <a name="config_files"></a><a class="headerlink" href="#configuration-files-a-name-config-files-a" title="Permalink to this heading">#</a></h2>
-<p>Core to the Extract Skills package, and in particular the taxonomy mapping functionality, is config files. These are included in the instantiation of the class, as so:</p>
-<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">es</span> <span class="o">=</span> <span class="n">ExtractSkills</span><span class="p">(</span><span class="n">config_name</span><span class="o">=</span><span class="s2">&quot;extract_skills_toy&quot;</span><span class="p">)</span>
-</pre></div>
-</div>
-<div class="section" id="predefined-configurations-a-name-predefined-config-a">
-<h3>Predefined configurations <a name="predefined_config"></a><a class="headerlink" href="#predefined-configurations-a-name-predefined-config-a" title="Permalink to this heading">#</a></h3>
-<p>There are currently three configurations available for running the skills extraction algorithm. These configurations contain information about parameter values, trained models and directory locations of stored data.</p>
-<ol class="arabic simple">
-<li><p><code class="docutils literal notranslate"><span class="pre">extract_skills_toy</span></code> - Configuration for a toy taxonomy example, useful for testing.</p></li>
-<li><p><code class="docutils literal notranslate"><span class="pre">extract_skills_esco</span></code> - Configuration for extracting skills and matching them to the ESCO skills taxonomy. This configuration is correct to v1.1.1 of ESCO.</p></li>
-<li><p><code class="docutils literal notranslate"><span class="pre">extract_skills_lightcast</span></code> - Configuration for extracting skills and matching them to the Lightcast skills taxonomy. This configuration is correct to the version of Lightcast as of 22/11/22.</p></li>
-</ol>
-<p>If you are mapping to the ESCO skills taxonomy using <code class="docutils literal notranslate"><span class="pre">extract_skills_esco.yaml</span></code>, we reviewed the top 100 skills and ultimately hard coded 43 of the most common skills which were not well matched from a random sample of 100,000 job adverts in the <a class="reference external" href="https://www.nesta.org.uk/data-visualisation-and-interactive/open-jobs-observatory/">Open Jobs Observatory</a> project with the most appropriate skills from the taxonomy.</p>
-</div>
-<div class="section" id="configuration-definitions-a-name-config-defs-a">
-<h3>Configuration definitions <a name="config_defs"></a><a class="headerlink" href="#configuration-definitions-a-name-config-defs-a" title="Permalink to this heading">#</a></h3>
-<p>Every predefined configuration includes the following parameters:</p>
-<div class="table-wrapper colwidths-auto docutils container">
-<table class="colwidths-auto docutils align-default">
-<thead>
-<tr class="row-odd"><th class="head"><p>Parameter</p></th>
-<th class="head"><p>Description</p></th>
-</tr>
-</thead>
-<tbody>
-<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">ner_model_path</span></code>: str</p></td>
-<td><p>The relative path to the NER model folder used to predict skill spans in job adverts.</p></td>
-</tr>
-<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">taxonomy_name</span></code>: str</p></td>
-<td><p>The name of the taxonomy to map onto.</p></td>
-</tr>
-<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">taxonomy_path</span></code>: str</p></td>
-<td><p>The relative path to the formatted taxonomy. Formatted taxonomy must be in <code class="docutils literal notranslate"><span class="pre">.csv</span></code> format.</p></td>
-</tr>
-<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">clean_job_ads</span></code>: bool, default=True</p></td>
-<td><p>Whether to perform light text cleaning on job adverts or not. Text cleaning includes detecting and splitting camelcase in job adverts, replacing various characters and converting bullet points to full stops. Defaults to True.</p></td>
-</tr>
-<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">min_multiskill_length</span></code>: int</p></td>
-<td><p>The minimum character length a predicted multi-skill sentence must be to apply splitting rules to.</p></td>
-</tr>
-<tr class="row-odd"><td><p>(optional) <code class="docutils literal notranslate"><span class="pre">taxonomy_embedding_file_name</span></code>: str</p></td>
-<td><p>The relative path to a taxonomy embedding file if it exists. If left unset the embeddings will be generated when the code is run.</p></td>
-</tr>
-<tr class="row-even"><td><p>(optional) <code class="docutils literal notranslate"><span class="pre">prev_skill_matches_file_name</span></code>: str</p></td>
-<td><p>The relative path to a previous skill matches file if it exists.</p></td>
-</tr>
-<tr class="row-odd"><td><p>(optional) <code class="docutils literal notranslate"><span class="pre">hard_labelled_skills_file_name</span></code>: str</p></td>
-<td><p>The relative path to a hard labelled skills file if it exists.</p></td>
-</tr>
-<tr class="row-even"><td><p>(optional) <code class="docutils literal notranslate"><span class="pre">hier_name_mapper_file_name</span></code>: str</p></td>
-<td><p>The relative path to a hierarchy name mapper file if it exists.</p></td>
-</tr>
-<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">num_hier_levels</span></code>: int</p></td>
-<td><p>The number of levels in the skills taxonomy hierarchy. This can be set to 0 if the taxonomy has no levels.</p></td>
-</tr>
-<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">skill_type_dict</span></code>: dict</p></td>
-<td><p>A dictionary that defines skill types and hierarchy types. <br /><br /> <code class="docutils literal notranslate"><span class="pre">{</span> <span class="pre">&quot;skill_types&quot;:</span> <span class="pre">[A</span> <span class="pre">list</span> <span class="pre">of</span> <span class="pre">the</span> <span class="pre">values</span> <span class="pre">of</span> <span class="pre">the</span> <span class="pre">'type'</span> <span class="pre">column</span> <span class="pre">which</span> <span class="pre">code</span> <span class="pre">skills],</span> <span class="pre">&quot;hier_types&quot;:</span> <span class="pre">[A</span> <span class="pre">list</span> <span class="pre">of</span> <span class="pre">the</span> <span class="pre">values</span> <span class="pre">of</span> <span class="pre">the</span> <span class="pre">'type'</span> <span class="pre">column</span> <span class="pre">which</span> <span class="pre">code</span> <span class="pre">skill</span> <span class="pre">groups,</span> <span class="pre">these</span> <span class="pre">need</span> <span class="pre">to</span> <span class="pre">be</span> <span class="pre">in</span> <span class="pre">order</span> <span class="pre">from</span> <span class="pre">least</span> <span class="pre">to</span> <span class="pre">most</span> <span class="pre">granular]}</span></code></p></td>
-</tr>
-<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">match_thresholds_dict</span></code>: dict</p></td>
-<td><p>A dictionary that defines thresholds at each level of the skills taxonomy hierarchy. For example,<br /> <br /> <code class="docutils literal notranslate"><span class="pre">{&quot;skill_match_thresh&quot;:</span> <span class="pre">0.7,</span> <span class="pre">&quot;top_tax_skills&quot;:</span> <span class="pre">{1:</span> <span class="pre">0.5,</span> <span class="pre">2:</span> <span class="pre">0.5,</span> <span class="pre">3:</span> <span class="pre">0.5},“max_share”:</span> <span class="pre">{1:</span> <span class="pre">0,</span> <span class="pre">2:</span> <span class="pre">0.2,</span> <span class="pre">3:</span> <span class="pre">0.2}}</span></code><br /> <br /> See <strong>Model Card: Skills to Taxonomy Mapping</strong> for the details of what these thresholds represent.</p></td>
-</tr>
-<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">skill_name_col</span></code>: str</p></td>
-<td><p>The name of the skill/hierarchy level description text column in formatted taxonomy <code class="docutils literal notranslate"><span class="pre">.csv</span></code>.</p></td>
-</tr>
-<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">skill_id_col</span></code>: str</p></td>
-<td><p>Name of skill id column in formatted taxonomy <code class="docutils literal notranslate"><span class="pre">.csv</span></code>. Each row should contain a unique ID for the skill/hierarchy.</p></td>
-</tr>
-<tr class="row-even"><td><p>(optional) <code class="docutils literal notranslate"><span class="pre">skill_hier_info_col</span></code>: str</p></td>
-<td><p>Name of hierarchy info column in formatted taxonomy <code class="docutils literal notranslate"><span class="pre">.csv</span></code>. The hierarchy info column contains which hierarchy levels a skill is in (from least to most granular). If not a skill, then NA.</p></td>
-</tr>
-<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">skill_type_col</span></code>: str</p></td>
-<td><p>Name of what column name the skill/hier description is from (category, subcategory) in formatted taxonomy <code class="docutils literal notranslate"><span class="pre">.csv</span></code>.</p></td>
-</tr>
-</tbody>
-</table>
-</div>
-</div>
-</div>
-<div class="section" id="mapping-to-your-own-taxonomy-a-name-mapping-a">
-<h2>Mapping to your own taxonomy <a name="mapping"></a><a class="headerlink" href="#mapping-to-your-own-taxonomy-a-name-mapping-a" title="Permalink to this heading">#</a></h2>
-<p>Although we currently support three configurations for running the skills extraction algorithm, you are also able to map extracted skills onto a taxonomy of your choice by defining your own configuration file. In order to map skills onto your own taxonomy you must:</p>
-<ol class="arabic simple">
-<li><p>Format your taxonomy</p></li>
-<li><p>Define your own configuration file</p></li>
-</ol>
-<div class="section" id="format-your-taxonomy-a-name-format-tax-a">
-<h3>Format your taxonomy <a name="format_tax"></a><a class="headerlink" href="#format-your-taxonomy-a-name-format-tax-a" title="Permalink to this heading">#</a></h3>
-<p>You must also format your taxonomy in such a way that looks like the following:</p>
-<div class="table-wrapper colwidths-auto docutils container">
-<table class="colwidths-auto docutils align-default">
-<thead>
-<tr class="row-odd"><th class="head"><p>skill_type_col</p></th>
-<th class="head"><p>skill_name_col</p></th>
-<th class="head"><p>skill_id_col</p></th>
-<th class="head"><p>(optional) skill_hier_info_col</p></th>
-</tr>
-</thead>
-<tbody>
-<tr class="row-even"><td><p>skill</p></td>
-<td><p>use spreadsheets software</p></td>
-<td><p>abcd</p></td>
-<td><p><code class="docutils literal notranslate"><span class="pre">[[&quot;S&quot;,</span> <span class="pre">&quot;S5&quot;,</span> <span class="pre">&quot;S5.6&quot;,</span> <span class="pre">&quot;S5.6.1&quot;],</span> <span class="pre">[&quot;S&quot;,</span> <span class="pre">&quot;S5&quot;,</span> <span class="pre">&quot;S5.5&quot;,</span> <span class="pre">&quot;S5.5.2&quot;]]</span></code></p></td>
-</tr>
-<tr class="row-odd"><td><p>skill</p></td>
-<td><p>use communication techniques</p></td>
-<td><p>cdef</p></td>
-<td><p><code class="docutils literal notranslate"><span class="pre">[[&quot;S&quot;,</span> <span class="pre">&quot;S1&quot;,</span> <span class="pre">&quot;S1.0&quot;,</span> <span class="pre">&quot;S1.0.0&quot;]]</span></code></p></td>
-</tr>
-<tr class="row-even"><td><p>skill_group_3</p></td>
-<td><p>communication, collaboration and creativity</p></td>
-<td><p>S1.0.0</p></td>
-<td><p>NaN</p></td>
-</tr>
-<tr class="row-odd"><td><p>skill_group_3</p></td>
-<td><p>mathematics</p></td>
-<td><p>S1.2.1</p></td>
-<td><p>NaN</p></td>
-</tr>
-<tr class="row-even"><td><p>skill_group_2</p></td>
-<td><p>presenting information</p></td>
-<td><p>S1.4</p></td>
-<td><p>NaN</p></td>
-</tr>
-</tbody>
-</table>
-</div>
-<p>You will see the <code class="docutils literal notranslate"><span class="pre">skill_type_col</span></code> column contains skills and skill groups. This is because we try to match to individual skills, but if this isn’t possible we then try to match to a skill group in the taxonomy (if given).</p>
-<p>For rows which correspond to individual skills (rather than skill groups) the <code class="docutils literal notranslate"><span class="pre">skill_hier_info_col</span></code> column values show all the parts of the taxonomy where this skill is situated. It is helpful to link these codes to names, so you may also want to create a taxonomy name mapper file for this data, e.g. <code class="docutils literal notranslate"><span class="pre">{&quot;S1.2.1&quot;:</span> <span class="pre">&quot;mathematics&quot;}</span></code>. For rows which correspond to skill groups (rather than individual skills) the <code class="docutils literal notranslate"><span class="pre">skill_hier_info_col</span></code> column will be blank since the hierarchy information is contained in the <code class="docutils literal notranslate"><span class="pre">skill_id_col</span></code> column. The contents of <code class="docutils literal notranslate"><span class="pre">skill_hier_info_col</span></code> need to be a list of lists, or a list of strings, but not a combination of both.</p>
-<p>The number of levels in the taxonomy will correspond to the length of the lists in the <code class="docutils literal notranslate"><span class="pre">skill_hier_info_col</span></code> column.</p>
-<p>Although we don’t provide guidance on re-formatting your taxonomy, we have re-formatted the ESCO taxonomy to this format in <a class="reference external" href="https://github.com/nestauk/ojd_daps_skills/blob/dev/ojd_daps_skills/pipeline/skill_ner_mapping/esco_formatting.py">this script</a> and we have re-formatted the Lightcast taxonomy to this format in <a class="reference external" href="https://github.com/nestauk/ojd_daps_skills/blob/dev/ojd_daps_skills/pipeline/skill_ner_mapping/lightcast_formatting.py">this script</a>.</p>
-</div>
-<div class="section" id="define-your-own-configuration-file-a-name-custom-config-a">
-<h3>Define your own configuration file <a name="custom_config"></a><a class="headerlink" href="#define-your-own-configuration-file-a-name-custom-config-a" title="Permalink to this heading">#</a></h3>
-<p>Create your own configuration <code class="docutils literal notranslate"><span class="pre">yaml</span></code> file in the format <code class="docutils literal notranslate"><span class="pre">extract_skills_taxonomy_name.yaml</span></code>. This config should contain all the parameters as described in <a class="reference external" href="https://nestauk.github.io/ojd_daps_skills/build/html/custom_usage.html#configuration-definitions-a-name-config-defs-a">Predefined configuration definitions</a>. The file should be saved to <code class="docutils literal notranslate"><span class="pre">your_current_path/ojd_daps_skills/config/</span></code>.</p>
-<p>We provide a template config file <a class="reference external" href="https://github.com/nestauk/ojd_daps_skills/blob/dev/ojd_daps_skills/config/extract_skills_template.yaml">here</a>.</p>
-<p>It is important that the list given in <code class="docutils literal notranslate"><span class="pre">skill_type_dict['hier_types']</span></code> is in the order from the least to most granular parts of the taxonomy. For example, in the ESCO taxonomy we match against the second and third skill group levels, so this is set to <code class="docutils literal notranslate"><span class="pre">[&quot;level_2&quot;,</span> <span class="pre">&quot;level_3&quot;]</span></code> i.e. level 3 is more granular than level 2, where <code class="docutils literal notranslate"><span class="pre">level</span> <span class="pre">2</span> <span class="pre">skill</span> <span class="pre">groups</span> <span class="pre">&gt;</span> <span class="pre">level</span> <span class="pre">3</span> <span class="pre">skill</span> <span class="pre">groups</span> <span class="pre">&gt;</span> <span class="pre">individual</span> <span class="pre">skill</span></code>.</p>
-<p>Now you can use your custom taxonomy as:</p>
-<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="kn">from</span> <span class="nn">ojd_daps_skills.pipeline.extract_skills.extract_skills</span> <span class="kn">import</span> <span class="n">ExtractSkills</span> <span class="c1">#import the module</span>
+      </header>
+      <aside class="sidebar-drawer">
+        <div class="sidebar-container">
+          <div class="sidebar-sticky">
+            <a class="sidebar-brand centered" href="index.html">
+              <div class="sidebar-logo-container">
+                <img
+                  class="sidebar-logo"
+                  src="_static/nesta_escoe_transparent.png"
+                  alt="Logo"
+                />
+              </div>
+
+              <span class="sidebar-brand-text"
+                >Skills Extractor v1.0.1 documentation</span
+              >
+            </a>
+            <form
+              class="sidebar-search-container"
+              method="get"
+              action="search.html"
+              role="search"
+            >
+              <input
+                class="sidebar-search"
+                placeholder="Search"
+                name="q"
+                aria-label="Search"
+              />
+              <input type="hidden" name="check_keywords" value="yes" />
+              <input type="hidden" name="area" value="default" />
+            </form>
+            <div id="searchbox"></div>
+            <div class="sidebar-scroll">
+              <div class="sidebar-tree">
+                <ul class="current">
+                  <li class="toctree-l1">
+                    <a class="reference internal" href="about.html"
+                      >Skills Extractor</a
+                    >
+                  </li>
+                  <li class="toctree-l1 current current-page">
+                    <a class="current reference internal" href="#"
+                      >Custom Usage</a
+                    >
+                  </li>
+                  <li class="toctree-l1">
+                    <a class="reference internal" href="pipeline_summary.html"
+                      >Pipeline summary and metrics</a
+                    >
+                  </li>
+                  <li class="toctree-l1">
+                    <a class="reference internal" href="model_card.html"
+                      >Model Cards</a
+                    >
+                  </li>
+                  <li class="toctree-l1">
+                    <a class="reference internal" href="labelling.html"
+                      >Entity Labelling</a
+                    >
+                  </li>
+                  <li class="toctree-l1">
+                    <a class="reference internal" href="extract_skills.html"
+                      >The
+                      <code class="docutils literal notranslate"
+                        ><span class="pre">ExtractSkills</span></code
+                      >
+                      class</a
+                    >
+                  </li>
+                  <li class="toctree-l1">
+                    <a class="reference internal" href="license.html"
+                      >The MIT License (MIT)</a
+                    >
+                  </li>
+                </ul>
+              </div>
+            </div>
+          </div>
+        </div>
+      </aside>
+      <div class="main">
+        <div class="content">
+          <div class="article-container">
+            <a href="#" class="back-to-top muted-link">
+              <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24">
+                <path
+                  d="M13 20h-2V8l-5.5 5.5-1.42-1.42L12 4.16l7.92 7.92-1.42 1.42L13 8v12z"
+                ></path>
+              </svg>
+              <span>Back to top</span>
+            </a>
+            <div class="content-icon-container">
+              <div class="theme-toggle-container theme-toggle-content">
+                <button class="theme-toggle">
+                  <div class="visually-hidden">
+                    Toggle Light / Dark / Auto color theme
+                  </div>
+                  <svg class="theme-icon-when-auto">
+                    <use href="#svg-sun-half"></use>
+                  </svg>
+                  <svg class="theme-icon-when-dark">
+                    <use href="#svg-moon"></use>
+                  </svg>
+                  <svg class="theme-icon-when-light">
+                    <use href="#svg-sun"></use>
+                  </svg>
+                </button>
+              </div>
+              <label class="toc-overlay-icon toc-content-icon" for="__toc">
+                <div class="visually-hidden">
+                  Toggle table of contents sidebar
+                </div>
+                <i class="icon"
+                  ><svg><use href="#svg-toc"></use></svg
+                ></i>
+              </label>
+            </div>
+            <article role="main">
+              <section id="custom-usage">
+                <h1>
+                  Custom Usage<a
+                    class="headerlink"
+                    href="#custom-usage"
+                    title="Permalink to this heading"
+                    >#</a
+                  >
+                </h1>
+                <p>
+                  <code class="docutils literal notranslate"
+                    ><span class="pre">extract_skills.py</span></code
+                  >
+                  combines the prediction of skills using code from
+                  <a
+                    class="reference external"
+                    href="https://github.com/nestauk/ojd_daps_skills/tree/dev/ojd_daps_skills/pipeline/skill_ner"
+                    >skill_ner</a
+                  >
+                  with the mapping of skills to a taxonomy using code from
+                  <a
+                    class="reference external"
+                    href="https://github.com/nestauk/ojd_daps_skills/tree/dev/ojd_daps_skills/pipeline/skill_ner_mapping"
+                    >skill_ner_mapping</a
+                  >.
+                </p>
+                <p>
+                  This page explains more about the custom usage of this class
+                  including creating a custom config file and mapping to another
+                  taxonomy. To do this you will need to clone the repo. Please
+                  refer to the main documentation page for the
+                  <a
+                    class="reference external"
+                    href="https://nestauk.github.io/ojd_daps_skills/build/html/about.html#development-a-name-development-a"
+                    >development setup instructions</a
+                  >
+                  for this package and the core usage.
+                </p>
+                <section id="configuration-files">
+                  <h2>
+                    Configuration files <a name="config_files"></a
+                    ><a
+                      class="headerlink"
+                      href="#configuration-files"
+                      title="Permalink to this heading"
+                      >#</a
+                    >
+                  </h2>
+                  <p>
+                    Core to the Extract Skills package, and in particular the
+                    taxonomy mapping functionality, is config files. These are
+                    included in the instantiation of the class, as so:
+                  </p>
+                  <div class="highlight-default notranslate">
+                    <div class="highlight">
+                      <pre><span></span><span class="n">es</span> <span class="o">=</span> <span class="n">ExtractSkills</span><span class="p">(</span><span class="n">config_name</span><span class="o">=</span><span class="s2">&quot;extract_skills_toy&quot;</span><span class="p">)</span>
+</pre>
+                    </div>
+                  </div>
+                  <section id="predefined-configurations">
+                    <h3>
+                      Predefined configurations <a name="predefined_config"></a
+                      ><a
+                        class="headerlink"
+                        href="#predefined-configurations"
+                        title="Permalink to this heading"
+                        >#</a
+                      >
+                    </h3>
+                    <p>
+                      There are currently three configurations available for
+                      running the skills extraction algorithm. These
+                      configurations contain information about parameter values,
+                      trained models and directory locations of stored data.
+                    </p>
+                    <ol class="arabic simple">
+                      <li>
+                        <p>
+                          <code class="docutils literal notranslate"
+                            ><span class="pre">extract_skills_toy</span></code
+                          >
+                          - Configuration for a toy taxonomy example, useful for
+                          testing.
+                        </p>
+                      </li>
+                      <li>
+                        <p>
+                          <code class="docutils literal notranslate"
+                            ><span class="pre">extract_skills_esco</span></code
+                          >
+                          - Configuration for extracting skills and matching
+                          them to the ESCO skills taxonomy. This configuration
+                          is correct to v1.1.1 of ESCO.
+                        </p>
+                      </li>
+                      <li>
+                        <p>
+                          <code class="docutils literal notranslate"
+                            ><span class="pre"
+                              >extract_skills_lightcast</span
+                            ></code
+                          >
+                          - Configuration for extracting skills and matching
+                          them to the Lightcast skills taxonomy. This
+                          configuration is correct to the version of Lightcast
+                          as of 22/11/22.
+                        </p>
+                      </li>
+                    </ol>
+                    <p>
+                      If you are mapping to the ESCO skills taxonomy using
+                      <code class="docutils literal notranslate"
+                        ><span class="pre">extract_skills_esco.yaml</span></code
+                      >, we reviewed the top 100 skills and ultimately hard
+                      coded 43 of the most common skills which were not well
+                      matched from a random sample of 100,000 job adverts in the
+                      <a
+                        class="reference external"
+                        href="https://www.nesta.org.uk/data-visualisation-and-interactive/open-jobs-observatory/"
+                        >Open Jobs Observatory</a
+                      >
+                      project with the most appropriate skills from the
+                      taxonomy.
+                    </p>
+                  </section>
+                  <section id="configuration-definitions">
+                    <h3>
+                      Configuration definitions <a name="config_defs"></a
+                      ><a
+                        class="headerlink"
+                        href="#configuration-definitions"
+                        title="Permalink to this heading"
+                        >#</a
+                      >
+                    </h3>
+                    <p>
+                      Every predefined configuration includes the following
+                      parameters:
+                    </p>
+                    <div
+                      class="table-wrapper colwidths-auto docutils container"
+                    >
+                      <table class="docutils align-default">
+                        <thead>
+                          <tr class="row-odd">
+                            <th class="head"><p>Parameter</p></th>
+                            <th class="head"><p>Description</p></th>
+                          </tr>
+                        </thead>
+                        <tbody>
+                          <tr class="row-even">
+                            <td>
+                              <p>
+                                <code class="docutils literal notranslate"
+                                  ><span class="pre">ner_model_path</span></code
+                                >: str
+                              </p>
+                            </td>
+                            <td>
+                              <p>
+                                The relative path to the NER model folder used
+                                to predict skill spans in job adverts.
+                              </p>
+                            </td>
+                          </tr>
+                          <tr class="row-odd">
+                            <td>
+                              <p>
+                                <code class="docutils literal notranslate"
+                                  ><span class="pre">taxonomy_name</span></code
+                                >: str
+                              </p>
+                            </td>
+                            <td>
+                              <p>The name of the taxonomy to map onto.</p>
+                            </td>
+                          </tr>
+                          <tr class="row-even">
+                            <td>
+                              <p>
+                                <code class="docutils literal notranslate"
+                                  ><span class="pre">taxonomy_path</span></code
+                                >: str
+                              </p>
+                            </td>
+                            <td>
+                              <p>
+                                The relative path to the formatted taxonomy.
+                                Formatted taxonomy must be in
+                                <code class="docutils literal notranslate"
+                                  ><span class="pre">.csv</span></code
+                                >
+                                format.
+                              </p>
+                            </td>
+                          </tr>
+                          <tr class="row-odd">
+                            <td>
+                              <p>
+                                <code class="docutils literal notranslate"
+                                  ><span class="pre">clean_job_ads</span></code
+                                >: bool, default=True
+                              </p>
+                            </td>
+                            <td>
+                              <p>
+                                Whether to perform light text cleaning on job
+                                adverts or not. Text cleaning includes detecting
+                                and splitting camelcase in job adverts,
+                                replacing various characters and converting
+                                bullet points to full stops. Defaults to True.
+                              </p>
+                            </td>
+                          </tr>
+                          <tr class="row-even">
+                            <td>
+                              <p>
+                                <code class="docutils literal notranslate"
+                                  ><span class="pre"
+                                    >min_multiskill_length</span
+                                  ></code
+                                >: int
+                              </p>
+                            </td>
+                            <td>
+                              <p>
+                                The minimum character length a predicted
+                                multi-skill sentence must be to apply splitting
+                                rules to.
+                              </p>
+                            </td>
+                          </tr>
+                          <tr class="row-odd">
+                            <td>
+                              <p>
+                                (optional)
+                                <code class="docutils literal notranslate"
+                                  ><span class="pre"
+                                    >taxonomy_embedding_file_name</span
+                                  ></code
+                                >: str
+                              </p>
+                            </td>
+                            <td>
+                              <p>
+                                The relative path to a taxonomy embedding file
+                                if it exists. If left unset the embeddings will
+                                be generated when the code is run.
+                              </p>
+                            </td>
+                          </tr>
+                          <tr class="row-even">
+                            <td>
+                              <p>
+                                (optional)
+                                <code class="docutils literal notranslate"
+                                  ><span class="pre"
+                                    >prev_skill_matches_file_name</span
+                                  ></code
+                                >: str
+                              </p>
+                            </td>
+                            <td>
+                              <p>
+                                The relative path to a previous skill matches
+                                file if it exists.
+                              </p>
+                            </td>
+                          </tr>
+                          <tr class="row-odd">
+                            <td>
+                              <p>
+                                (optional)
+                                <code class="docutils literal notranslate"
+                                  ><span class="pre"
+                                    >hard_labelled_skills_file_name</span
+                                  ></code
+                                >: str
+                              </p>
+                            </td>
+                            <td>
+                              <p>
+                                The relative path to a hard labelled skills file
+                                if it exists.
+                              </p>
+                            </td>
+                          </tr>
+                          <tr class="row-even">
+                            <td>
+                              <p>
+                                (optional)
+                                <code class="docutils literal notranslate"
+                                  ><span class="pre"
+                                    >hier_name_mapper_file_name</span
+                                  ></code
+                                >: str
+                              </p>
+                            </td>
+                            <td>
+                              <p>
+                                The relative path to a hierarchy name mapper
+                                file if it exists.
+                              </p>
+                            </td>
+                          </tr>
+                          <tr class="row-odd">
+                            <td>
+                              <p>
+                                <code class="docutils literal notranslate"
+                                  ><span class="pre"
+                                    >num_hier_levels</span
+                                  ></code
+                                >: int
+                              </p>
+                            </td>
+                            <td>
+                              <p>
+                                The number of levels in the skills taxonomy
+                                hierarchy. This can be set to 0 if the taxonomy
+                                has no levels.
+                              </p>
+                            </td>
+                          </tr>
+                          <tr class="row-even">
+                            <td>
+                              <p>
+                                <code class="docutils literal notranslate"
+                                  ><span class="pre"
+                                    >skill_type_dict</span
+                                  ></code
+                                >: dict
+                              </p>
+                            </td>
+                            <td>
+                              <p>
+                                A dictionary that defines skill types and
+                                hierarchy types. <br /><br />
+                                <code class="docutils literal notranslate"
+                                  ><span class="pre">{</span>
+                                  <span class="pre"
+                                    >&quot;skill_types&quot;:</span
+                                  >
+                                  <span class="pre">[A</span>
+                                  <span class="pre">list</span>
+                                  <span class="pre">of</span>
+                                  <span class="pre">the</span>
+                                  <span class="pre">values</span>
+                                  <span class="pre">of</span>
+                                  <span class="pre">the</span>
+                                  <span class="pre">'type'</span>
+                                  <span class="pre">column</span>
+                                  <span class="pre">which</span>
+                                  <span class="pre">code</span>
+                                  <span class="pre">skills],</span>
+                                  <span class="pre"
+                                    >&quot;hier_types&quot;:</span
+                                  >
+                                  <span class="pre">[A</span>
+                                  <span class="pre">list</span>
+                                  <span class="pre">of</span>
+                                  <span class="pre">the</span>
+                                  <span class="pre">values</span>
+                                  <span class="pre">of</span>
+                                  <span class="pre">the</span>
+                                  <span class="pre">'type'</span>
+                                  <span class="pre">column</span>
+                                  <span class="pre">which</span>
+                                  <span class="pre">code</span>
+                                  <span class="pre">skill</span>
+                                  <span class="pre">groups,</span>
+                                  <span class="pre">these</span>
+                                  <span class="pre">need</span>
+                                  <span class="pre">to</span>
+                                  <span class="pre">be</span>
+                                  <span class="pre">in</span>
+                                  <span class="pre">order</span>
+                                  <span class="pre">from</span>
+                                  <span class="pre">least</span>
+                                  <span class="pre">to</span>
+                                  <span class="pre">most</span>
+                                  <span class="pre">granular]}</span></code
+                                >
+                              </p>
+                            </td>
+                          </tr>
+                          <tr class="row-odd">
+                            <td>
+                              <p>
+                                <code class="docutils literal notranslate"
+                                  ><span class="pre"
+                                    >match_thresholds_dict</span
+                                  ></code
+                                >: dict
+                              </p>
+                            </td>
+                            <td>
+                              <p>
+                                A dictionary that defines thresholds at each
+                                level of the skills taxonomy hierarchy. For
+                                example,<br />
+                                <br />
+                                <code class="docutils literal notranslate"
+                                  ><span class="pre"
+                                    >{&quot;skill_match_thresh&quot;:</span
+                                  >
+                                  <span class="pre">0.7,</span>
+                                  <span class="pre"
+                                    >&quot;top_tax_skills&quot;:</span
+                                  >
+                                  <span class="pre">{1:</span>
+                                  <span class="pre">0.5,</span>
+                                  <span class="pre">2:</span>
+                                  <span class="pre">0.5,</span>
+                                  <span class="pre">3:</span>
+                                  <span class="pre">0.5},“max_share”:</span>
+                                  <span class="pre">{1:</span>
+                                  <span class="pre">0,</span>
+                                  <span class="pre">2:</span>
+                                  <span class="pre">0.2,</span>
+                                  <span class="pre">3:</span>
+                                  <span class="pre">0.2}}</span></code
+                                ><br />
+                                <br />
+                                See
+                                <strong
+                                  >Model Card: Skills to Taxonomy
+                                  Mapping</strong
+                                >
+                                for the details of what these thresholds
+                                represent.
+                              </p>
+                            </td>
+                          </tr>
+                          <tr class="row-even">
+                            <td>
+                              <p>
+                                <code class="docutils literal notranslate"
+                                  ><span class="pre">skill_name_col</span></code
+                                >: str
+                              </p>
+                            </td>
+                            <td>
+                              <p>
+                                The name of the skill/hierarchy level
+                                description text column in formatted taxonomy
+                                <code class="docutils literal notranslate"
+                                  ><span class="pre">.csv</span></code
+                                >.
+                              </p>
+                            </td>
+                          </tr>
+                          <tr class="row-odd">
+                            <td>
+                              <p>
+                                <code class="docutils literal notranslate"
+                                  ><span class="pre">skill_id_col</span></code
+                                >: str
+                              </p>
+                            </td>
+                            <td>
+                              <p>
+                                Name of skill id column in formatted taxonomy
+                                <code class="docutils literal notranslate"
+                                  ><span class="pre">.csv</span></code
+                                >. Each row should contain a unique ID for the
+                                skill/hierarchy.
+                              </p>
+                            </td>
+                          </tr>
+                          <tr class="row-even">
+                            <td>
+                              <p>
+                                (optional)
+                                <code class="docutils literal notranslate"
+                                  ><span class="pre"
+                                    >skill_hier_info_col</span
+                                  ></code
+                                >: str
+                              </p>
+                            </td>
+                            <td>
+                              <p>
+                                Name of hierarchy info column in formatted
+                                taxonomy
+                                <code class="docutils literal notranslate"
+                                  ><span class="pre">.csv</span></code
+                                >. The hierarchy info column contains which
+                                hierarchy levels a skill is in (from least to
+                                most granular). If not a skill, then NA.
+                              </p>
+                            </td>
+                          </tr>
+                          <tr class="row-odd">
+                            <td>
+                              <p>
+                                <code class="docutils literal notranslate"
+                                  ><span class="pre">skill_type_col</span></code
+                                >: str
+                              </p>
+                            </td>
+                            <td>
+                              <p>
+                                Name of what column name the skill/hier
+                                description is from (category, subcategory) in
+                                formatted taxonomy
+                                <code class="docutils literal notranslate"
+                                  ><span class="pre">.csv</span></code
+                                >.
+                              </p>
+                            </td>
+                          </tr>
+                        </tbody>
+                      </table>
+                    </div>
+                  </section>
+                </section>
+                <section id="mapping-to-your-own-taxonomy">
+                  <h2>
+                    Mapping to your own taxonomy <a name="mapping"></a
+                    ><a
+                      class="headerlink"
+                      href="#mapping-to-your-own-taxonomy"
+                      title="Permalink to this heading"
+                      >#</a
+                    >
+                  </h2>
+                  <p>
+                    Although we currently support three configurations for
+                    running the skills extraction algorithm, you are also able
+                    to map extracted skills onto a taxonomy of your choice by
+                    defining your own configuration file. In order to map skills
+                    onto your own taxonomy you must:
+                  </p>
+                  <ol class="arabic simple">
+                    <li><p>Format your taxonomy</p></li>
+                    <li><p>Define your own configuration file</p></li>
+                  </ol>
+                  <section id="format-your-taxonomy">
+                    <h3>
+                      Format your taxonomy <a name="format_tax"></a
+                      ><a
+                        class="headerlink"
+                        href="#format-your-taxonomy"
+                        title="Permalink to this heading"
+                        >#</a
+                      >
+                    </h3>
+                    <p>
+                      You must also format your taxonomy in such a way that
+                      looks like the following:
+                    </p>
+                    <div
+                      class="table-wrapper colwidths-auto docutils container"
+                    >
+                      <table class="docutils align-default">
+                        <thead>
+                          <tr class="row-odd">
+                            <th class="head"><p>skill_type_col</p></th>
+                            <th class="head"><p>skill_name_col</p></th>
+                            <th class="head"><p>skill_id_col</p></th>
+                            <th class="head">
+                              <p>(optional) skill_hier_info_col</p>
+                            </th>
+                          </tr>
+                        </thead>
+                        <tbody>
+                          <tr class="row-even">
+                            <td><p>skill</p></td>
+                            <td><p>use spreadsheets software</p></td>
+                            <td><p>abcd</p></td>
+                            <td>
+                              <p>
+                                <code class="docutils literal notranslate"
+                                  ><span class="pre">[[&quot;S&quot;,</span>
+                                  <span class="pre">&quot;S5&quot;,</span>
+                                  <span class="pre">&quot;S5.6&quot;,</span>
+                                  <span class="pre">&quot;S5.6.1&quot;],</span>
+                                  <span class="pre">[&quot;S&quot;,</span>
+                                  <span class="pre">&quot;S5&quot;,</span>
+                                  <span class="pre">&quot;S5.5&quot;,</span>
+                                  <span class="pre"
+                                    >&quot;S5.5.2&quot;]]</span
+                                  ></code
+                                >
+                              </p>
+                            </td>
+                          </tr>
+                          <tr class="row-odd">
+                            <td><p>skill</p></td>
+                            <td><p>use communication techniques</p></td>
+                            <td><p>cdef</p></td>
+                            <td>
+                              <p>
+                                <code class="docutils literal notranslate"
+                                  ><span class="pre">[[&quot;S&quot;,</span>
+                                  <span class="pre">&quot;S1&quot;,</span>
+                                  <span class="pre">&quot;S1.0&quot;,</span>
+                                  <span class="pre"
+                                    >&quot;S1.0.0&quot;]]</span
+                                  ></code
+                                >
+                              </p>
+                            </td>
+                          </tr>
+                          <tr class="row-even">
+                            <td><p>skill_group_3</p></td>
+                            <td>
+                              <p>communication, collaboration and creativity</p>
+                            </td>
+                            <td><p>S1.0.0</p></td>
+                            <td><p>NaN</p></td>
+                          </tr>
+                          <tr class="row-odd">
+                            <td><p>skill_group_3</p></td>
+                            <td><p>mathematics</p></td>
+                            <td><p>S1.2.1</p></td>
+                            <td><p>NaN</p></td>
+                          </tr>
+                          <tr class="row-even">
+                            <td><p>skill_group_2</p></td>
+                            <td><p>presenting information</p></td>
+                            <td><p>S1.4</p></td>
+                            <td><p>NaN</p></td>
+                          </tr>
+                        </tbody>
+                      </table>
+                    </div>
+                    <p>
+                      You will see the
+                      <code class="docutils literal notranslate"
+                        ><span class="pre">skill_type_col</span></code
+                      >
+                      column contains skills and skill groups. This is because
+                      we try to match to individual skills, but if this isn’t
+                      possible we then try to match to a skill group in the
+                      taxonomy (if given).
+                    </p>
+                    <p>
+                      For rows which correspond to individual skills (rather
+                      than skill groups) the
+                      <code class="docutils literal notranslate"
+                        ><span class="pre">skill_hier_info_col</span></code
+                      >
+                      column values show all the parts of the taxonomy where
+                      this skill is situated. It is helpful to link these codes
+                      to names, so you may also want to create a taxonomy name
+                      mapper file for this data, e.g.
+                      <code class="docutils literal notranslate"
+                        ><span class="pre">{&quot;S1.2.1&quot;:</span>
+                        <span class="pre">&quot;mathematics&quot;}</span></code
+                      >. For rows which correspond to skill groups (rather than
+                      individual skills) the
+                      <code class="docutils literal notranslate"
+                        ><span class="pre">skill_hier_info_col</span></code
+                      >
+                      column will be blank since the hierarchy information is
+                      contained in the
+                      <code class="docutils literal notranslate"
+                        ><span class="pre">skill_id_col</span></code
+                      >
+                      column. The contents of
+                      <code class="docutils literal notranslate"
+                        ><span class="pre">skill_hier_info_col</span></code
+                      >
+                      need to be a list of lists, or a list of strings, but not
+                      a combination of both.
+                    </p>
+                    <p>
+                      The number of levels in the taxonomy will correspond to
+                      the length of the lists in the
+                      <code class="docutils literal notranslate"
+                        ><span class="pre">skill_hier_info_col</span></code
+                      >
+                      column.
+                    </p>
+                    <p>
+                      Although we don’t provide guidance on re-formatting your
+                      taxonomy, we have re-formatted the ESCO taxonomy to this
+                      format in
+                      <a
+                        class="reference external"
+                        href="https://github.com/nestauk/ojd_daps_skills/blob/dev/ojd_daps_skills/pipeline/skill_ner_mapping/esco_formatting.py"
+                        >this script</a
+                      >
+                      and we have re-formatted the Lightcast taxonomy to this
+                      format in
+                      <a
+                        class="reference external"
+                        href="https://github.com/nestauk/ojd_daps_skills/blob/dev/ojd_daps_skills/pipeline/skill_ner_mapping/lightcast_formatting.py"
+                        >this script</a
+                      >.
+                    </p>
+                  </section>
+                  <section id="define-your-own-configuration-file">
+                    <h3>
+                      Define your own configuration file
+                      <a name="custom_config"></a
+                      ><a
+                        class="headerlink"
+                        href="#define-your-own-configuration-file"
+                        title="Permalink to this heading"
+                        >#</a
+                      >
+                    </h3>
+                    <p>
+                      Create your own configuration
+                      <code class="docutils literal notranslate"
+                        ><span class="pre">yaml</span></code
+                      >
+                      file in the format
+                      <code class="docutils literal notranslate"
+                        ><span class="pre"
+                          >extract_skills_taxonomy_name.yaml</span
+                        ></code
+                      >. This config should contain all the parameters as
+                      described in
+                      <a
+                        class="reference external"
+                        href="https://nestauk.github.io/ojd_daps_skills/build/html/custom_usage.html#configuration-definitions-a-name-config-defs-a"
+                        >Predefined configuration definitions</a
+                      >. The file should be saved to
+                      <code class="docutils literal notranslate"
+                        ><span class="pre"
+                          >your_current_path/ojd_daps_skills/config/</span
+                        ></code
+                      >.
+                    </p>
+                    <p>
+                      We provide a template config file
+                      <a
+                        class="reference external"
+                        href="https://github.com/nestauk/ojd_daps_skills/blob/dev/ojd_daps_skills/config/extract_skills_template.yaml"
+                        >here</a
+                      >.
+                    </p>
+                    <p>
+                      It is important that the list given in
+                      <code class="docutils literal notranslate"
+                        ><span class="pre"
+                          >skill_type_dict['hier_types']</span
+                        ></code
+                      >
+                      is in the order from the least to most granular parts of
+                      the taxonomy. For example, in the ESCO taxonomy we match
+                      against the second and third skill group levels, so this
+                      is set to
+                      <code class="docutils literal notranslate"
+                        ><span class="pre">[&quot;level_2&quot;,</span>
+                        <span class="pre">&quot;level_3&quot;]</span></code
+                      >
+                      i.e. level 3 is more granular than level 2, where
+                      <code class="docutils literal notranslate"
+                        ><span class="pre">level</span>
+                        <span class="pre">2</span>
+                        <span class="pre">skill</span>
+                        <span class="pre">groups</span>
+                        <span class="pre">&gt;</span>
+                        <span class="pre">level</span>
+                        <span class="pre">3</span>
+                        <span class="pre">skill</span>
+                        <span class="pre">groups</span>
+                        <span class="pre">&gt;</span>
+                        <span class="pre">individual</span>
+                        <span class="pre">skill</span></code
+                      >.
+                    </p>
+                    <p>Now you can use your custom taxonomy as:</p>
+                    <div class="highlight-default notranslate">
+                      <div class="highlight">
+                        <pre><span></span><span class="kn">from</span> <span class="nn">ojd_daps_skills.pipeline.extract_skills.extract_skills</span> <span class="kn">import</span> <span class="n">ExtractSkills</span> <span class="c1">#import the module</span>
 
 <span class="n">es</span> <span class="o">=</span> <span class="n">ExtractSkills</span><span class="p">(</span><span class="n">config_name</span><span class="o">=</span><span class="s2">&quot;my_custom_config_name&quot;</span><span class="p">,</span> <span class="n">local</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
 
 <span class="n">es</span><span class="o">.</span><span class="n">load</span><span class="p">()</span>
 
-</pre></div>
-</div>
-</div>
-</div>
-</div>
+</pre>
+                      </div>
+                    </div>
+                  </section>
+                </section>
+              </section>
+            </article>
+          </div>
+          <footer>
+            <div class="related-pages">
+              <a class="next-page" href="pipeline_summary.html">
+                <div class="page-info">
+                  <div class="context">
+                    <span>Next</span>
+                  </div>
+                  <div class="title">Pipeline summary and metrics</div>
+                </div>
+                <svg class="furo-related-icon">
+                  <use href="#svg-arrow-right"></use>
+                </svg>
+              </a>
+              <a class="prev-page" href="about.html">
+                <svg class="furo-related-icon">
+                  <use href="#svg-arrow-right"></use>
+                </svg>
+                <div class="page-info">
+                  <div class="context">
+                    <span>Previous</span>
+                  </div>
 
-        </article>
-      </div>
-      <footer>
-        
-        <div class="related-pages">
-          <a class="next-page" href="pipeline_summary.html">
-              <div class="page-info">
-                <div class="context">
-                  <span>Next</span>
+                  <div class="title">Skills Extractor</div>
                 </div>
-                <div class="title">Pipeline summary and metrics</div>
-              </div>
-              <svg class="furo-related-icon"><use href="#svg-arrow-right"></use></svg>
-            </a>
-          <a class="prev-page" href="about.html">
-              <svg class="furo-related-icon"><use href="#svg-arrow-right"></use></svg>
-              <div class="page-info">
-                <div class="context">
-                  <span>Previous</span>
+              </a>
+            </div>
+            <div class="bottom-of-page">
+              <div class="left-details">
+                <div class="copyright">
+                  Copyright &#169; 2022, Liz Gallagher, India Kerle
                 </div>
-                
-                <div class="title">Skills Extractor</div>
-                
+                Made with <a href="https://www.sphinx-doc.org/">Sphinx</a> and
+                <a class="muted-link" href="https://pradyunsg.me">@pradyunsg</a
+                >'s
+
+                <a href="https://github.com/pradyunsg/furo">Furo</a>
               </div>
-            </a>
+              <div class="right-details">
+                <div class="icons"></div>
+              </div>
+            </div>
+          </footer>
         </div>
-        <div class="bottom-of-page">
-          <div class="left-details">
-            <div class="copyright">
-                Copyright &#169; 2022, Liz Gallagher, India Kerle
+        <aside class="toc-drawer">
+          <div class="toc-sticky toc-scroll">
+            <div class="toc-title-container">
+              <span class="toc-title"> On this page </span>
             </div>
-            Made with <a href="https://www.sphinx-doc.org/">Sphinx</a> and <a class="muted-link" href="https://pradyunsg.me">@pradyunsg</a>'s
-            
-            <a href="https://github.com/pradyunsg/furo">Furo</a>
-            
-          </div>
-          <div class="right-details">
-            <div class="icons">
-              
+            <div class="toc-tree-container">
+              <div class="toc-tree">
+                <ul>
+                  <li>
+                    <a class="reference internal" href="#">Custom Usage</a>
+                    <ul>
+                      <li>
+                        <a
+                          class="reference internal"
+                          href="#configuration-files"
+                          >Configuration files <a name="config_files"></a
+                        ></a>
+                        <ul>
+                          <li>
+                            <a
+                              class="reference internal"
+                              href="#predefined-configurations"
+                              >Predefined configurations
+                              <a name="predefined_config"></a
+                            ></a>
+                          </li>
+                          <li>
+                            <a
+                              class="reference internal"
+                              href="#configuration-definitions"
+                              >Configuration definitions
+                              <a name="config_defs"></a
+                            ></a>
+                          </li>
+                        </ul>
+                      </li>
+                      <li>
+                        <a
+                          class="reference internal"
+                          href="#mapping-to-your-own-taxonomy"
+                          >Mapping to your own taxonomy <a name="mapping"></a
+                        ></a>
+                        <ul>
+                          <li>
+                            <a
+                              class="reference internal"
+                              href="#format-your-taxonomy"
+                              >Format your taxonomy <a name="format_tax"></a
+                            ></a>
+                          </li>
+                          <li>
+                            <a
+                              class="reference internal"
+                              href="#define-your-own-configuration-file"
+                              >Define your own configuration file
+                              <a name="custom_config"></a
+                            ></a>
+                          </li>
+                        </ul>
+                      </li>
+                    </ul>
+                  </li>
+                </ul>
+              </div>
             </div>
           </div>
-        </div>
-        
-      </footer>
-    </div>
-    <aside class="toc-drawer">
-      
-      
-      <div class="toc-sticky toc-scroll">
-        <div class="toc-title-container">
-          <span class="toc-title">
-            On this page
-          </span>
-        </div>
-        <div class="toc-tree-container">
-          <div class="toc-tree">
-            <ul>
-<li><a class="reference internal" href="#">Custom Usage</a><ul>
-<li><a class="reference internal" href="#configuration-files-a-name-config-files-a">Configuration files <a name="config_files"></a></a><ul>
-<li><a class="reference internal" href="#predefined-configurations-a-name-predefined-config-a">Predefined configurations <a name="predefined_config"></a></a></li>
-<li><a class="reference internal" href="#configuration-definitions-a-name-config-defs-a">Configuration definitions <a name="config_defs"></a></a></li>
-</ul>
-</li>
-<li><a class="reference internal" href="#mapping-to-your-own-taxonomy-a-name-mapping-a">Mapping to your own taxonomy <a name="mapping"></a></a><ul>
-<li><a class="reference internal" href="#format-your-taxonomy-a-name-format-tax-a">Format your taxonomy <a name="format_tax"></a></a></li>
-<li><a class="reference internal" href="#define-your-own-configuration-file-a-name-custom-config-a">Define your own configuration file <a name="custom_config"></a></a></li>
-</ul>
-</li>
-</ul>
-</li>
-</ul>
-
-          </div>
-        </div>
+        </aside>
       </div>
-      
-      
-    </aside>
-  </div>
-</div><script data-url_root="./" id="documentation_options" src="_static/documentation_options.js"></script>
+    </div>
+    <script
+      data-url_root="./"
+      id="documentation_options"
+      src="_static/documentation_options.js"
+    ></script>
     <script src="_static/jquery.js"></script>
     <script src="_static/underscore.js"></script>
     <script src="_static/_sphinx_javascript_frameworks_compat.js"></script>
     <script src="_static/doctools.js"></script>
     <script src="_static/sphinx_highlight.js"></script>
     <script src="_static/scripts/furo.js"></script>
-    </body>
-</html>
\ No newline at end of file
+  </body>
+</html>
diff --git a/docs/build/html/extract_skills.html b/docs/build/html/extract_skills.html
index 170f4e04..1f163e59 100644
--- a/docs/build/html/extract_skills.html
+++ b/docs/build/html/extract_skills.html
@@ -1,398 +1,1119 @@
-<!doctype html>
+<!DOCTYPE html>
 <html class="no-js" lang="en">
-  <head><meta charset="utf-8"/>
-    <meta name="viewport" content="width=device-width,initial-scale=1"/>
-    <meta name="color-scheme" content="light dark"><link rel="author" title="About these documents" href="about.html" /><link rel="index" title="Index" href="genindex.html" /><link rel="search" title="Search" href="search.html" /><link rel="next" title="The MIT License (MIT)" href="license.html" /><link rel="prev" title="Entity Labelling" href="labelling.html" />
+  <head>
+    <meta charset="utf-8" />
+    <meta name="viewport" content="width=device-width,initial-scale=1" />
+    <meta name="color-scheme" content="light dark" />
+    <meta
+      name="generator"
+      content="Docutils 0.18.1: http://docutils.sourceforge.net/"
+    />
+    <link rel="author" title="About these documents" href="about.html" />
+    <link rel="index" title="Index" href="genindex.html" />
+    <link rel="search" title="Search" href="search.html" />
+    <link rel="next" title="The MIT License (MIT)" href="license.html" />
+    <link rel="prev" title="Entity Labelling" href="labelling.html" />
 
-    <meta name="generator" content="sphinx-5.3.0, furo 2022.09.29"/>
-        <title>The ExtractSkills class - Skills Extractor v1.0.1 documentation</title>
-      <link rel="stylesheet" type="text/css" href="_static/pygments.css" />
-    <link rel="stylesheet" type="text/css" href="_static/styles/furo.css?digest=d81277517bee4d6b0349d71bb2661d4890b5617c" />
-    <link rel="stylesheet" type="text/css" href="_static/styles/furo-extensions.css?digest=30d1aed668e5c3a91c3e3bf6a60b675221979f0e" />
-    
-    
+    <meta name="generator" content="sphinx-5.3.0, furo 2022.09.29" />
+    <title>
+      The ExtractSkills class - Skills Extractor v1.0.1 documentation
+    </title>
+    <link rel="stylesheet" type="text/css" href="_static/pygments.css" />
+    <link
+      rel="stylesheet"
+      type="text/css"
+      href="_static/styles/furo.css?digest=d81277517bee4d6b0349d71bb2661d4890b5617c"
+    />
+    <link
+      rel="stylesheet"
+      type="text/css"
+      href="_static/styles/furo-extensions.css?digest=30d1aed668e5c3a91c3e3bf6a60b675221979f0e"
+    />
 
-
-<style>
-  body {
-    --color-code-background: #f8f8f8;
-  --color-code-foreground: black;
-  
-  }
-  @media not print {
-    body[data-theme="dark"] {
-      --color-code-background: #202020;
-  --color-code-foreground: #d0d0d0;
-  
-    }
-    @media (prefers-color-scheme: dark) {
-      body:not([data-theme="light"]) {
-        --color-code-background: #202020;
-  --color-code-foreground: #d0d0d0;
-  
+    <style>
+      body {
+        --color-code-background: #f8f8f8;
+        --color-code-foreground: black;
+      }
+      @media not print {
+        body[data-theme="dark"] {
+          --color-code-background: #202020;
+          --color-code-foreground: #d0d0d0;
+        }
+        @media (prefers-color-scheme: dark) {
+          body:not([data-theme="light"]) {
+            --color-code-background: #202020;
+            --color-code-foreground: #d0d0d0;
+          }
+        }
       }
-    }
-  }
-</style></head>
+    </style>
+  </head>
   <body>
-    
     <script>
       document.body.dataset.theme = localStorage.getItem("theme") || "auto";
     </script>
-    
 
-<svg xmlns="http://www.w3.org/2000/svg" style="display: none;">
-  <symbol id="svg-toc" viewBox="0 0 24 24">
-    <title>Contents</title>
-    <svg stroke="currentColor" fill="currentColor" stroke-width="0" viewBox="0 0 1024 1024">
-      <path d="M408 442h480c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8H408c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8zm-8 204c0 4.4 3.6 8 8 8h480c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8H408c-4.4 0-8 3.6-8 8v56zm504-486H120c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8h784c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8zm0 632H120c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8h784c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8zM115.4 518.9L271.7 642c5.8 4.6 14.4.5 14.4-6.9V388.9c0-7.4-8.5-11.5-14.4-6.9L115.4 505.1a8.74 8.74 0 0 0 0 13.8z"/>
-    </svg>
-  </symbol>
-  <symbol id="svg-menu" viewBox="0 0 24 24">
-    <title>Menu</title>
-    <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
-      stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="feather-menu">
-      <line x1="3" y1="12" x2="21" y2="12"></line>
-      <line x1="3" y1="6" x2="21" y2="6"></line>
-      <line x1="3" y1="18" x2="21" y2="18"></line>
-    </svg>
-  </symbol>
-  <symbol id="svg-arrow-right" viewBox="0 0 24 24">
-    <title>Expand</title>
-    <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
-      stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="feather-chevron-right">
-      <polyline points="9 18 15 12 9 6"></polyline>
-    </svg>
-  </symbol>
-  <symbol id="svg-sun" viewBox="0 0 24 24">
-    <title>Light mode</title>
-    <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
-      stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round" class="feather-sun">
-      <circle cx="12" cy="12" r="5"></circle>
-      <line x1="12" y1="1" x2="12" y2="3"></line>
-      <line x1="12" y1="21" x2="12" y2="23"></line>
-      <line x1="4.22" y1="4.22" x2="5.64" y2="5.64"></line>
-      <line x1="18.36" y1="18.36" x2="19.78" y2="19.78"></line>
-      <line x1="1" y1="12" x2="3" y2="12"></line>
-      <line x1="21" y1="12" x2="23" y2="12"></line>
-      <line x1="4.22" y1="19.78" x2="5.64" y2="18.36"></line>
-      <line x1="18.36" y1="5.64" x2="19.78" y2="4.22"></line>
-    </svg>
-  </symbol>
-  <symbol id="svg-moon" viewBox="0 0 24 24">
-    <title>Dark mode</title>
-    <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
-      stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round" class="icon-tabler-moon">
-      <path stroke="none" d="M0 0h24v24H0z" fill="none" />
-      <path d="M12 3c.132 0 .263 0 .393 0a7.5 7.5 0 0 0 7.92 12.446a9 9 0 1 1 -8.313 -12.454z" />
-    </svg>
-  </symbol>
-  <symbol id="svg-sun-half" viewBox="0 0 24 24">
-    <title>Auto light/dark mode</title>
-    <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
-      stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round" class="icon-tabler-shadow">
-      <path stroke="none" d="M0 0h24v24H0z" fill="none"/>
-      <circle cx="12" cy="12" r="9" />
-      <path d="M13 12h5" />
-      <path d="M13 15h4" />
-      <path d="M13 18h1" />
-      <path d="M13 9h4" />
-      <path d="M13 6h1" />
+    <svg xmlns="http://www.w3.org/2000/svg" style="display: none">
+      <symbol id="svg-toc" viewBox="0 0 24 24">
+        <title>Contents</title>
+        <svg
+          stroke="currentColor"
+          fill="currentColor"
+          stroke-width="0"
+          viewBox="0 0 1024 1024"
+        >
+          <path
+            d="M408 442h480c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8H408c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8zm-8 204c0 4.4 3.6 8 8 8h480c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8H408c-4.4 0-8 3.6-8 8v56zm504-486H120c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8h784c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8zm0 632H120c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8h784c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8zM115.4 518.9L271.7 642c5.8 4.6 14.4.5 14.4-6.9V388.9c0-7.4-8.5-11.5-14.4-6.9L115.4 505.1a8.74 8.74 0 0 0 0 13.8z"
+          />
+        </svg>
+      </symbol>
+      <symbol id="svg-menu" viewBox="0 0 24 24">
+        <title>Menu</title>
+        <svg
+          xmlns="http://www.w3.org/2000/svg"
+          viewBox="0 0 24 24"
+          fill="none"
+          stroke="currentColor"
+          stroke-width="2"
+          stroke-linecap="round"
+          stroke-linejoin="round"
+          class="feather-menu"
+        >
+          <line x1="3" y1="12" x2="21" y2="12"></line>
+          <line x1="3" y1="6" x2="21" y2="6"></line>
+          <line x1="3" y1="18" x2="21" y2="18"></line>
+        </svg>
+      </symbol>
+      <symbol id="svg-arrow-right" viewBox="0 0 24 24">
+        <title>Expand</title>
+        <svg
+          xmlns="http://www.w3.org/2000/svg"
+          viewBox="0 0 24 24"
+          fill="none"
+          stroke="currentColor"
+          stroke-width="2"
+          stroke-linecap="round"
+          stroke-linejoin="round"
+          class="feather-chevron-right"
+        >
+          <polyline points="9 18 15 12 9 6"></polyline>
+        </svg>
+      </symbol>
+      <symbol id="svg-sun" viewBox="0 0 24 24">
+        <title>Light mode</title>
+        <svg
+          xmlns="http://www.w3.org/2000/svg"
+          viewBox="0 0 24 24"
+          fill="none"
+          stroke="currentColor"
+          stroke-width="1.5"
+          stroke-linecap="round"
+          stroke-linejoin="round"
+          class="feather-sun"
+        >
+          <circle cx="12" cy="12" r="5"></circle>
+          <line x1="12" y1="1" x2="12" y2="3"></line>
+          <line x1="12" y1="21" x2="12" y2="23"></line>
+          <line x1="4.22" y1="4.22" x2="5.64" y2="5.64"></line>
+          <line x1="18.36" y1="18.36" x2="19.78" y2="19.78"></line>
+          <line x1="1" y1="12" x2="3" y2="12"></line>
+          <line x1="21" y1="12" x2="23" y2="12"></line>
+          <line x1="4.22" y1="19.78" x2="5.64" y2="18.36"></line>
+          <line x1="18.36" y1="5.64" x2="19.78" y2="4.22"></line>
+        </svg>
+      </symbol>
+      <symbol id="svg-moon" viewBox="0 0 24 24">
+        <title>Dark mode</title>
+        <svg
+          xmlns="http://www.w3.org/2000/svg"
+          viewBox="0 0 24 24"
+          fill="none"
+          stroke="currentColor"
+          stroke-width="1.5"
+          stroke-linecap="round"
+          stroke-linejoin="round"
+          class="icon-tabler-moon"
+        >
+          <path stroke="none" d="M0 0h24v24H0z" fill="none" />
+          <path
+            d="M12 3c.132 0 .263 0 .393 0a7.5 7.5 0 0 0 7.92 12.446a9 9 0 1 1 -8.313 -12.454z"
+          />
+        </svg>
+      </symbol>
+      <symbol id="svg-sun-half" viewBox="0 0 24 24">
+        <title>Auto light/dark mode</title>
+        <svg
+          xmlns="http://www.w3.org/2000/svg"
+          viewBox="0 0 24 24"
+          fill="none"
+          stroke="currentColor"
+          stroke-width="1.5"
+          stroke-linecap="round"
+          stroke-linejoin="round"
+          class="icon-tabler-shadow"
+        >
+          <path stroke="none" d="M0 0h24v24H0z" fill="none" />
+          <circle cx="12" cy="12" r="9" />
+          <path d="M13 12h5" />
+          <path d="M13 15h4" />
+          <path d="M13 18h1" />
+          <path d="M13 9h4" />
+          <path d="M13 6h1" />
+        </svg>
+      </symbol>
     </svg>
-  </symbol>
-</svg>
 
-<input type="checkbox" class="sidebar-toggle" name="__navigation" id="__navigation">
-<input type="checkbox" class="sidebar-toggle" name="__toc" id="__toc">
-<label class="overlay sidebar-overlay" for="__navigation">
-  <div class="visually-hidden">Hide navigation sidebar</div>
-</label>
-<label class="overlay toc-overlay" for="__toc">
-  <div class="visually-hidden">Hide table of contents sidebar</div>
-</label>
+    <input
+      type="checkbox"
+      class="sidebar-toggle"
+      name="__navigation"
+      id="__navigation"
+    />
+    <input type="checkbox" class="sidebar-toggle" name="__toc" id="__toc" />
+    <label class="overlay sidebar-overlay" for="__navigation">
+      <div class="visually-hidden">Hide navigation sidebar</div>
+    </label>
+    <label class="overlay toc-overlay" for="__toc">
+      <div class="visually-hidden">Hide table of contents sidebar</div>
+    </label>
 
-
-
-<div class="page">
-  <header class="mobile-header">
-    <div class="header-left">
-      <label class="nav-overlay-icon" for="__navigation">
-        <div class="visually-hidden">Toggle site navigation sidebar</div>
-        <i class="icon"><svg><use href="#svg-menu"></use></svg></i>
-      </label>
-    </div>
-    <div class="header-center">
-      <a href="index.html"><div class="brand">Skills Extractor v1.0.1 documentation</div></a>
-    </div>
-    <div class="header-right">
-      <div class="theme-toggle-container theme-toggle-header">
-        <button class="theme-toggle">
-          <div class="visually-hidden">Toggle Light / Dark / Auto color theme</div>
-          <svg class="theme-icon-when-auto"><use href="#svg-sun-half"></use></svg>
-          <svg class="theme-icon-when-dark"><use href="#svg-moon"></use></svg>
-          <svg class="theme-icon-when-light"><use href="#svg-sun"></use></svg>
-        </button>
-      </div>
-      <label class="toc-overlay-icon toc-header-icon" for="__toc">
-        <div class="visually-hidden">Toggle table of contents sidebar</div>
-        <i class="icon"><svg><use href="#svg-toc"></use></svg></i>
-      </label>
-    </div>
-  </header>
-  <aside class="sidebar-drawer">
-    <div class="sidebar-container">
-      
-      <div class="sidebar-sticky"><a class="sidebar-brand centered" href="index.html">
-  
-  <div class="sidebar-logo-container">
-    <img class="sidebar-logo" src="_static/nesta_escoe_transparent.png" alt="Logo"/>
-  </div>
-  
-  <span class="sidebar-brand-text">Skills Extractor v1.0.1 documentation</span>
-  
-</a><form class="sidebar-search-container" method="get" action="search.html" role="search">
-  <input class="sidebar-search" placeholder=Search name="q" aria-label="Search">
-  <input type="hidden" name="check_keywords" value="yes">
-  <input type="hidden" name="area" value="default">
-</form>
-<div id="searchbox"></div><div class="sidebar-scroll"><div class="sidebar-tree">
-  <ul class="current">
-<li class="toctree-l1"><a class="reference internal" href="about.html">Skills Extractor</a></li>
-<li class="toctree-l1"><a class="reference internal" href="custom_usage.html">Custom Usage</a></li>
-<li class="toctree-l1"><a class="reference internal" href="pipeline_summary.html">Pipeline summary and metrics</a></li>
-<li class="toctree-l1"><a class="reference internal" href="model_card.html">Model Cards</a></li>
-<li class="toctree-l1"><a class="reference internal" href="labelling.html">Entity Labelling</a></li>
-<li class="toctree-l1 current current-page"><a class="current reference internal" href="#">The <code class="docutils literal notranslate"><span class="pre">ExtractSkills</span></code> class</a></li>
-<li class="toctree-l1"><a class="reference internal" href="license.html">The MIT License (MIT)</a></li>
-</ul>
-
-</div>
-</div>
-
-      </div>
-      
-    </div>
-  </aside>
-  <div class="main">
-    <div class="content">
-      <div class="article-container">
-        <a href="#" class="back-to-top muted-link">
-          <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24">
-            <path d="M13 20h-2V8l-5.5 5.5-1.42-1.42L12 4.16l7.92 7.92-1.42 1.42L13 8v12z"></path>
-          </svg>
-          <span>Back to top</span>
-        </a>
-        <div class="content-icon-container">
-          
-<div class="theme-toggle-container theme-toggle-content">
+    <div class="page">
+      <header class="mobile-header">
+        <div class="header-left">
+          <label class="nav-overlay-icon" for="__navigation">
+            <div class="visually-hidden">Toggle site navigation sidebar</div>
+            <i class="icon"
+              ><svg><use href="#svg-menu"></use></svg
+            ></i>
+          </label>
+        </div>
+        <div class="header-center">
+          <a href="index.html"
+            ><div class="brand">Skills Extractor v1.0.1 documentation</div></a
+          >
+        </div>
+        <div class="header-right">
+          <div class="theme-toggle-container theme-toggle-header">
             <button class="theme-toggle">
-              <div class="visually-hidden">Toggle Light / Dark / Auto color theme</div>
-              <svg class="theme-icon-when-auto"><use href="#svg-sun-half"></use></svg>
-              <svg class="theme-icon-when-dark"><use href="#svg-moon"></use></svg>
-              <svg class="theme-icon-when-light"><use href="#svg-sun"></use></svg>
+              <div class="visually-hidden">
+                Toggle Light / Dark / Auto color theme
+              </div>
+              <svg class="theme-icon-when-auto">
+                <use href="#svg-sun-half"></use>
+              </svg>
+              <svg class="theme-icon-when-dark">
+                <use href="#svg-moon"></use>
+              </svg>
+              <svg class="theme-icon-when-light">
+                <use href="#svg-sun"></use>
+              </svg>
             </button>
           </div>
-          <label class="toc-overlay-icon toc-content-icon" for="__toc">
+          <label class="toc-overlay-icon toc-header-icon" for="__toc">
             <div class="visually-hidden">Toggle table of contents sidebar</div>
-            <i class="icon"><svg><use href="#svg-toc"></use></svg></i>
+            <i class="icon"
+              ><svg><use href="#svg-toc"></use></svg
+            ></i>
           </label>
         </div>
-        <article role="main">
-          <div class="section" id="the-extractskills-class">
-<h1>The <code class="docutils literal notranslate"><span class="pre">ExtractSkills</span></code> class<a class="headerlink" href="#the-extractskills-class" title="Permalink to this heading">#</a></h1>
-<dl class="py class">
-<dt class="sig sig-object py" id="ojd_daps_skills.pipeline.extract_skills.extract_skills.ExtractSkills">
-<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">ojd_daps_skills.pipeline.extract_skills.extract_skills.</span></span><span class="sig-name descname"><span class="pre">ExtractSkills</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">config_name</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'extract_skills_toy'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">local</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">True</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">verbose</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">True</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">multi_process</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/ojd_daps_skills/pipeline/extract_skills/extract_skills.html#ExtractSkills"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#ojd_daps_skills.pipeline.extract_skills.extract_skills.ExtractSkills" title="Permalink to this definition">#</a></dt>
-<dd><p>Class to extract skills from job adverts and map them to a skills taxonomy.</p>
-<dl class="field-list simple">
-<dt class="field-odd">Parameters</dt>
-<dd class="field-odd"><ul class="simple">
-<li><p><strong>config_path</strong> (<em>str</em>) – The file name for the config file to be used, defaults to “extract_skills_toy”</p></li>
-<li><p><strong>local</strong> (<em>bool</em>) – Whether you want to load data from local files (True, if not found they will be downloaded from a public source) or via Nesta’s private s3 bucket (False, needs access), defaults to True</p></li>
-<li><p><strong>verbose</strong> (<em>bool</em>) – Whether to limit the number of logging messages (True) or not (False, good for debugging), defaults to True</p></li>
-<li><p><strong>multi_process</strong> (<em>bool</em>) – Whether to use multiprocessing (True) or not (False), defaults to False</p></li>
-</ul>
-</dd>
-</dl>
-</dd></dl>
+      </header>
+      <aside class="sidebar-drawer">
+        <div class="sidebar-container">
+          <div class="sidebar-sticky">
+            <a class="sidebar-brand centered" href="index.html">
+              <div class="sidebar-logo-container">
+                <img
+                  class="sidebar-logo"
+                  src="_static/nesta_escoe_transparent.png"
+                  alt="Logo"
+                />
+              </div>
 
-<dl class="py method">
-<dt class="sig sig-object py" id="ojd_daps_skills.pipeline.extract_skills.extract_skills.ExtractSkills.load">
-<span class="sig-prename descclassname"><span class="pre">ExtractSkills.</span></span><span class="sig-name descname"><span class="pre">load</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">taxonomy_embedding_file_name</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">Optional</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">]</span></span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">prev_skill_matches_file_name</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">Optional</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">]</span></span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">hard_labelled_skills_name</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">Optional</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">]</span></span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">hier_name_mapper_file_name</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">Optional</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">]</span></span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/ojd_daps_skills/pipeline/extract_skills/extract_skills.html#ExtractSkills.load"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#ojd_daps_skills.pipeline.extract_skills.extract_skills.ExtractSkills.load" title="Permalink to this definition">#</a></dt>
-<dd><p>Loads necessary datasets (formatted taxonomy, hard labelled skills, previously matched skills,
-taxonomy embeddings), JobNER skills extraction class and SkillMapper skill mapper class.</p>
-<dl class="field-list simple">
-<dt class="field-odd">Parameters</dt>
-<dd class="field-odd"><ul class="simple">
-<li><p><strong>taxonomy_embedding_file_name</strong> (<em>str</em><em>, </em><em>optional</em>) – The relative path to a taxonomy embedding file if it exists. If left unset the embeddings will be generated when the code is run. Defaults to None.</p></li>
-<li><p><strong>prev_skill_matches_file_name</strong> (<em>str</em><em>, </em><em>optional</em>) – The relative path to a previous skill matches file if it exists. Defaults to None.</p></li>
-<li><p><strong>hard_labelled_skills_name</strong> (<em>str</em><em>, </em><em>optional</em>) – The relative path to a hard labelled skills file if it exists. Defaults to None.</p></li>
-<li><p><strong>hier_name_mapper_file_name</strong> (<em>str</em><em>, </em><em>optional</em>) – The relative path to a hierarchy name mapper file if it exists. Defaults to None.</p></li>
-</ul>
-</dd>
-</dl>
-</dd></dl>
+              <span class="sidebar-brand-text"
+                >Skills Extractor v1.0.1 documentation</span
+              >
+            </a>
+            <form
+              class="sidebar-search-container"
+              method="get"
+              action="search.html"
+              role="search"
+            >
+              <input
+                class="sidebar-search"
+                placeholder="Search"
+                name="q"
+                aria-label="Search"
+              />
+              <input type="hidden" name="check_keywords" value="yes" />
+              <input type="hidden" name="area" value="default" />
+            </form>
+            <div id="searchbox"></div>
+            <div class="sidebar-scroll">
+              <div class="sidebar-tree">
+                <ul class="current">
+                  <li class="toctree-l1">
+                    <a class="reference internal" href="about.html"
+                      >Skills Extractor</a
+                    >
+                  </li>
+                  <li class="toctree-l1">
+                    <a class="reference internal" href="custom_usage.html"
+                      >Custom Usage</a
+                    >
+                  </li>
+                  <li class="toctree-l1">
+                    <a class="reference internal" href="pipeline_summary.html"
+                      >Pipeline summary and metrics</a
+                    >
+                  </li>
+                  <li class="toctree-l1">
+                    <a class="reference internal" href="model_card.html"
+                      >Model Cards</a
+                    >
+                  </li>
+                  <li class="toctree-l1">
+                    <a class="reference internal" href="labelling.html"
+                      >Entity Labelling</a
+                    >
+                  </li>
+                  <li class="toctree-l1 current current-page">
+                    <a class="current reference internal" href="#"
+                      >The
+                      <code class="docutils literal notranslate"
+                        ><span class="pre">ExtractSkills</span></code
+                      >
+                      class</a
+                    >
+                  </li>
+                  <li class="toctree-l1">
+                    <a class="reference internal" href="license.html"
+                      >The MIT License (MIT)</a
+                    >
+                  </li>
+                </ul>
+              </div>
+            </div>
+          </div>
+        </div>
+      </aside>
+      <div class="main">
+        <div class="content">
+          <div class="article-container">
+            <a href="#" class="back-to-top muted-link">
+              <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24">
+                <path
+                  d="M13 20h-2V8l-5.5 5.5-1.42-1.42L12 4.16l7.92 7.92-1.42 1.42L13 8v12z"
+                ></path>
+              </svg>
+              <span>Back to top</span>
+            </a>
+            <div class="content-icon-container">
+              <div class="theme-toggle-container theme-toggle-content">
+                <button class="theme-toggle">
+                  <div class="visually-hidden">
+                    Toggle Light / Dark / Auto color theme
+                  </div>
+                  <svg class="theme-icon-when-auto">
+                    <use href="#svg-sun-half"></use>
+                  </svg>
+                  <svg class="theme-icon-when-dark">
+                    <use href="#svg-moon"></use>
+                  </svg>
+                  <svg class="theme-icon-when-light">
+                    <use href="#svg-sun"></use>
+                  </svg>
+                </button>
+              </div>
+              <label class="toc-overlay-icon toc-content-icon" for="__toc">
+                <div class="visually-hidden">
+                  Toggle table of contents sidebar
+                </div>
+                <i class="icon"
+                  ><svg><use href="#svg-toc"></use></svg
+                ></i>
+              </label>
+            </div>
+            <article role="main">
+              <section id="the-extractskills-class">
+                <h1>
+                  The
+                  <code class="docutils literal notranslate"
+                    ><span class="pre">ExtractSkills</span></code
+                  >
+                  class<a
+                    class="headerlink"
+                    href="#the-extractskills-class"
+                    title="Permalink to this heading"
+                    >#</a
+                  >
+                </h1>
+                <dl class="py class">
+                  <dt
+                    class="sig sig-object py"
+                    id="ojd_daps_skills.pipeline.extract_skills.extract_skills.ExtractSkills"
+                  >
+                    <em class="property"
+                      ><span class="pre">class</span
+                      ><span class="w"> </span></em
+                    ><span class="sig-prename descclassname"
+                      ><span class="pre"
+                        >ojd_daps_skills.pipeline.extract_skills.extract_skills.</span
+                      ></span
+                    ><span class="sig-name descname"
+                      ><span class="pre">ExtractSkills</span></span
+                    ><span class="sig-paren">(</span
+                    ><em class="sig-param"
+                      ><span class="n"
+                        ><span class="pre">config_name</span></span
+                      ><span class="o"><span class="pre">=</span></span
+                      ><span class="default_value"
+                        ><span class="pre">'extract_skills_toy'</span></span
+                      ></em
+                    >,
+                    <em class="sig-param"
+                      ><span class="n"><span class="pre">local</span></span
+                      ><span class="o"><span class="pre">=</span></span
+                      ><span class="default_value"
+                        ><span class="pre">True</span></span
+                      ></em
+                    >,
+                    <em class="sig-param"
+                      ><span class="n"><span class="pre">verbose</span></span
+                      ><span class="o"><span class="pre">=</span></span
+                      ><span class="default_value"
+                        ><span class="pre">True</span></span
+                      ></em
+                    >,
+                    <em class="sig-param"
+                      ><span class="n"
+                        ><span class="pre">multi_process</span></span
+                      ><span class="o"><span class="pre">=</span></span
+                      ><span class="default_value"
+                        ><span class="pre">False</span></span
+                      ></em
+                    ><span class="sig-paren">)</span
+                    ><a
+                      class="reference internal"
+                      href="_modules/ojd_daps_skills/pipeline/extract_skills/extract_skills.html#ExtractSkills"
+                      ><span class="viewcode-link"
+                        ><span class="pre">[source]</span></span
+                      ></a
+                    ><a
+                      class="headerlink"
+                      href="#ojd_daps_skills.pipeline.extract_skills.extract_skills.ExtractSkills"
+                      title="Permalink to this definition"
+                      >#</a
+                    >
+                  </dt>
+                  <dd>
+                    <p>
+                      Class to extract skills from job adverts and map them to a
+                      skills taxonomy.
+                    </p>
+                    <dl class="field-list simple">
+                      <dt class="field-odd">
+                        Parameters<span class="colon">:</span>
+                      </dt>
+                      <dd class="field-odd">
+                        <ul class="simple">
+                          <li>
+                            <p>
+                              <strong>config_path</strong> (<em>str</em>) – The
+                              file name for the config file to be used, defaults
+                              to “extract_skills_toy”
+                            </p>
+                          </li>
+                          <li>
+                            <p>
+                              <strong>local</strong> (<em>bool</em>) – Whether
+                              you want to load data from local files (True, if
+                              not found they will be downloaded from a public
+                              source) or via Nesta’s private s3 bucket (False,
+                              needs access), defaults to True
+                            </p>
+                          </li>
+                          <li>
+                            <p>
+                              <strong>verbose</strong> (<em>bool</em>) – Whether
+                              to limit the number of logging messages (True) or
+                              not (False, good for debugging), defaults to True
+                            </p>
+                          </li>
+                          <li>
+                            <p>
+                              <strong>multi_process</strong> (<em>bool</em>) –
+                              Whether to use multiprocessing (True) or not
+                              (False), defaults to False
+                            </p>
+                          </li>
+                        </ul>
+                      </dd>
+                    </dl>
+                  </dd>
+                </dl>
 
-<dl class="py method">
-<dt class="sig sig-object py" id="ojd_daps_skills.pipeline.extract_skills.extract_skills.ExtractSkills.extract_skills">
-<span class="sig-prename descclassname"><span class="pre">ExtractSkills.</span></span><span class="sig-name descname"><span class="pre">extract_skills</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">job_adverts_skills</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">Union</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="pre">List</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">]</span></span><span class="p"><span class="pre">]</span></span></span></em>, <em class="sig-param"><span class="n"><span class="pre">format_skills</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/ojd_daps_skills/pipeline/extract_skills/extract_skills.html#ExtractSkills.extract_skills"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#ojd_daps_skills.pipeline.extract_skills.extract_skills.ExtractSkills.extract_skills" title="Permalink to this definition">#</a></dt>
-<dd><p>Extract skills from job adverts using a trained NER model and map them to a taxonomy - combines both get_skills and extract_skills. Experiences will also be extracted, but not mapped to a taxonomy. It can also take as input a list of
-skills and map them to a taxonomy if format_skills is set to True.</p>
-<dl class="field-list simple">
-<dt class="field-odd">Parameters</dt>
-<dd class="field-odd"><ul class="simple">
-<li><p><strong>job_adverts_skills</strong> (<em>str</em><em> or </em><em>list</em><em> of </em><em>strings</em>) – The text of a job advert, a list of job adverts texts, or a list of skills (if format_skills=True)</p></li>
-<li><p><strong>format_skills</strong> (<em>bool</em>) – If the input is a list of skills (rather than job adverts) then this needs to be set to True in order to format them correctly, default to False.</p></li>
-</ul>
-</dd>
-<dt class="field-even">Returns</dt>
-<dd class="field-even"><p>A list of dictionaries for each job advert containing the skill and experience entities, and for every skill entity where it maps to in the taxonomy. The output combines both multiskill and skill entities together in the “SKILL” key. Each dictionary is in the format {‘SKILL’: [(skill_entity,(taxonomy_skill_name, taxonomy_skill_id)), …]}, ‘EXPERIENCE’: […]]</p>
-</dd>
-<dt class="field-odd">Return type</dt>
-<dd class="field-odd"><p>list of dictionaries for each job advert.</p>
-</dd>
-</dl>
-</dd></dl>
+                <dl class="py method">
+                  <dt
+                    class="sig sig-object py"
+                    id="ojd_daps_skills.pipeline.extract_skills.extract_skills.ExtractSkills.load"
+                  >
+                    <span class="sig-prename descclassname"
+                      ><span class="pre">ExtractSkills.</span></span
+                    ><span class="sig-name descname"
+                      ><span class="pre">load</span></span
+                    ><span class="sig-paren">(</span
+                    ><em class="sig-param"
+                      ><span class="n"
+                        ><span class="pre"
+                          >taxonomy_embedding_file_name</span
+                        ></span
+                      ><span class="p"><span class="pre">:</span></span
+                      ><span class="w"> </span
+                      ><span class="n"
+                        ><span class="pre">Optional</span
+                        ><span class="p"><span class="pre">[</span></span
+                        ><span class="pre">str</span
+                        ><span class="p"><span class="pre">]</span></span></span
+                      ><span class="w"> </span
+                      ><span class="o"><span class="pre">=</span></span
+                      ><span class="w"> </span
+                      ><span class="default_value"
+                        ><span class="pre">None</span></span
+                      ></em
+                    >,
+                    <em class="sig-param"
+                      ><span class="n"
+                        ><span class="pre"
+                          >prev_skill_matches_file_name</span
+                        ></span
+                      ><span class="p"><span class="pre">:</span></span
+                      ><span class="w"> </span
+                      ><span class="n"
+                        ><span class="pre">Optional</span
+                        ><span class="p"><span class="pre">[</span></span
+                        ><span class="pre">str</span
+                        ><span class="p"><span class="pre">]</span></span></span
+                      ><span class="w"> </span
+                      ><span class="o"><span class="pre">=</span></span
+                      ><span class="w"> </span
+                      ><span class="default_value"
+                        ><span class="pre">None</span></span
+                      ></em
+                    >,
+                    <em class="sig-param"
+                      ><span class="n"
+                        ><span class="pre"
+                          >hard_labelled_skills_name</span
+                        ></span
+                      ><span class="p"><span class="pre">:</span></span
+                      ><span class="w"> </span
+                      ><span class="n"
+                        ><span class="pre">Optional</span
+                        ><span class="p"><span class="pre">[</span></span
+                        ><span class="pre">str</span
+                        ><span class="p"><span class="pre">]</span></span></span
+                      ><span class="w"> </span
+                      ><span class="o"><span class="pre">=</span></span
+                      ><span class="w"> </span
+                      ><span class="default_value"
+                        ><span class="pre">None</span></span
+                      ></em
+                    >,
+                    <em class="sig-param"
+                      ><span class="n"
+                        ><span class="pre"
+                          >hier_name_mapper_file_name</span
+                        ></span
+                      ><span class="p"><span class="pre">:</span></span
+                      ><span class="w"> </span
+                      ><span class="n"
+                        ><span class="pre">Optional</span
+                        ><span class="p"><span class="pre">[</span></span
+                        ><span class="pre">str</span
+                        ><span class="p"><span class="pre">]</span></span></span
+                      ><span class="w"> </span
+                      ><span class="o"><span class="pre">=</span></span
+                      ><span class="w"> </span
+                      ><span class="default_value"
+                        ><span class="pre">None</span></span
+                      ></em
+                    ><span class="sig-paren">)</span
+                    ><a
+                      class="reference internal"
+                      href="_modules/ojd_daps_skills/pipeline/extract_skills/extract_skills.html#ExtractSkills.load"
+                      ><span class="viewcode-link"
+                        ><span class="pre">[source]</span></span
+                      ></a
+                    ><a
+                      class="headerlink"
+                      href="#ojd_daps_skills.pipeline.extract_skills.extract_skills.ExtractSkills.load"
+                      title="Permalink to this definition"
+                      >#</a
+                    >
+                  </dt>
+                  <dd>
+                    <p>
+                      Loads necessary datasets (formatted taxonomy, hard
+                      labelled skills, previously matched skills, taxonomy
+                      embeddings), JobNER skills extraction class and
+                      SkillMapper skill mapper class.
+                    </p>
+                    <dl class="field-list simple">
+                      <dt class="field-odd">
+                        Parameters<span class="colon">:</span>
+                      </dt>
+                      <dd class="field-odd">
+                        <ul class="simple">
+                          <li>
+                            <p>
+                              <strong>taxonomy_embedding_file_name</strong> (<em
+                                >str</em
+                              ><em>, </em><em>optional</em>) – The relative path
+                              to a taxonomy embedding file if it exists. If left
+                              unset the embeddings will be generated when the
+                              code is run. Defaults to None.
+                            </p>
+                          </li>
+                          <li>
+                            <p>
+                              <strong>prev_skill_matches_file_name</strong> (<em
+                                >str</em
+                              ><em>, </em><em>optional</em>) – The relative path
+                              to a previous skill matches file if it exists.
+                              Defaults to None.
+                            </p>
+                          </li>
+                          <li>
+                            <p>
+                              <strong>hard_labelled_skills_name</strong> (<em
+                                >str</em
+                              ><em>, </em><em>optional</em>) – The relative path
+                              to a hard labelled skills file if it exists.
+                              Defaults to None.
+                            </p>
+                          </li>
+                          <li>
+                            <p>
+                              <strong>hier_name_mapper_file_name</strong> (<em
+                                >str</em
+                              ><em>, </em><em>optional</em>) – The relative path
+                              to a hierarchy name mapper file if it exists.
+                              Defaults to None.
+                            </p>
+                          </li>
+                        </ul>
+                      </dd>
+                    </dl>
+                  </dd>
+                </dl>
 
-<dl class="py method">
-<dt class="sig sig-object py" id="ojd_daps_skills.pipeline.extract_skills.extract_skills.ExtractSkills.get_skills">
-<span class="sig-prename descclassname"><span class="pre">ExtractSkills.</span></span><span class="sig-name descname"><span class="pre">get_skills</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">job_adverts</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">Union</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="pre">List</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">]</span></span><span class="p"><span class="pre">]</span></span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/ojd_daps_skills/pipeline/extract_skills/extract_skills.html#ExtractSkills.get_skills"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#ojd_daps_skills.pipeline.extract_skills.extract_skills.ExtractSkills.get_skills" title="Permalink to this definition">#</a></dt>
-<dd><p>Predict skill/multiskill/experience entities using the NER model in inputted job adverts.
-Multiskill entities will be split up and converted into individual skill entities where possible.</p>
-<dl class="field-list simple">
-<dt class="field-odd">Parameters</dt>
-<dd class="field-odd"><p><strong>job_adverts</strong> (<em>str</em><em> or </em><em>list</em><em> of </em><em>strings</em>) – The text of a job advert or a list of job adverts texts</p>
-</dd>
-<dt class="field-even">Returns</dt>
-<dd class="field-even"><p>A list of entities extracted from each job advert in the form of dictionaries {“SKILL”: [“Microsoft Excel”], “MULTISKILL”: [], “EXPERIENCE”: []}</p>
-</dd>
-<dt class="field-odd">Return type</dt>
-<dd class="field-odd"><p>list, the length is equal to the number of job adverts inputted</p>
-</dd>
-</dl>
-</dd></dl>
+                <dl class="py method">
+                  <dt
+                    class="sig sig-object py"
+                    id="ojd_daps_skills.pipeline.extract_skills.extract_skills.ExtractSkills.extract_skills"
+                  >
+                    <span class="sig-prename descclassname"
+                      ><span class="pre">ExtractSkills.</span></span
+                    ><span class="sig-name descname"
+                      ><span class="pre">extract_skills</span></span
+                    ><span class="sig-paren">(</span
+                    ><em class="sig-param"
+                      ><span class="n"
+                        ><span class="pre">job_adverts_skills</span></span
+                      ><span class="p"><span class="pre">:</span></span
+                      ><span class="w"> </span
+                      ><span class="n"
+                        ><span class="pre">Union</span
+                        ><span class="p"><span class="pre">[</span></span
+                        ><span class="pre">str</span
+                        ><span class="p"><span class="pre">,</span></span
+                        ><span class="w"> </span><span class="pre">List</span
+                        ><span class="p"><span class="pre">[</span></span
+                        ><span class="pre">str</span
+                        ><span class="p"><span class="pre">]</span></span
+                        ><span class="p"><span class="pre">]</span></span></span
+                      ></em
+                    >,
+                    <em class="sig-param"
+                      ><span class="n"
+                        ><span class="pre">format_skills</span></span
+                      ><span class="o"><span class="pre">=</span></span
+                      ><span class="default_value"
+                        ><span class="pre">False</span></span
+                      ></em
+                    ><span class="sig-paren">)</span
+                    ><a
+                      class="reference internal"
+                      href="_modules/ojd_daps_skills/pipeline/extract_skills/extract_skills.html#ExtractSkills.extract_skills"
+                      ><span class="viewcode-link"
+                        ><span class="pre">[source]</span></span
+                      ></a
+                    ><a
+                      class="headerlink"
+                      href="#ojd_daps_skills.pipeline.extract_skills.extract_skills.ExtractSkills.extract_skills"
+                      title="Permalink to this definition"
+                      >#</a
+                    >
+                  </dt>
+                  <dd>
+                    <p>
+                      Extract skills from job adverts using a trained NER model
+                      and map them to a taxonomy - combines both get_skills and
+                      extract_skills. Experiences will also be extracted, but
+                      not mapped to a taxonomy. It can also take as input a list
+                      of skills and map them to a taxonomy if format_skills is
+                      set to True.
+                    </p>
+                    <dl class="field-list simple">
+                      <dt class="field-odd">
+                        Parameters<span class="colon">:</span>
+                      </dt>
+                      <dd class="field-odd">
+                        <ul class="simple">
+                          <li>
+                            <p>
+                              <strong>job_adverts_skills</strong> (<em>str</em
+                              ><em> or </em><em>list</em><em> of </em
+                              ><em>strings</em>) – The text of a job advert, a
+                              list of job adverts texts, or a list of skills (if
+                              format_skills=True)
+                            </p>
+                          </li>
+                          <li>
+                            <p>
+                              <strong>format_skills</strong> (<em>bool</em>) –
+                              If the input is a list of skills (rather than job
+                              adverts) then this needs to be set to True in
+                              order to format them correctly, default to False.
+                            </p>
+                          </li>
+                        </ul>
+                      </dd>
+                      <dt class="field-even">
+                        Returns<span class="colon">:</span>
+                      </dt>
+                      <dd class="field-even">
+                        <p>
+                          A list of dictionaries for each job advert containing
+                          the skill and experience entities, and for every skill
+                          entity where it maps to in the taxonomy. The output
+                          combines both multiskill and skill entities together
+                          in the “SKILL” key. Each dictionary is in the format
+                          {‘SKILL’: [(skill_entity,(taxonomy_skill_name,
+                          taxonomy_skill_id)), …]}, ‘EXPERIENCE’: […]]
+                        </p>
+                      </dd>
+                      <dt class="field-odd">
+                        Return type<span class="colon">:</span>
+                      </dt>
+                      <dd class="field-odd">
+                        <p>list of dictionaries for each job advert.</p>
+                      </dd>
+                    </dl>
+                  </dd>
+                </dl>
 
-<dl class="py method">
-<dt class="sig sig-object py" id="ojd_daps_skills.pipeline.extract_skills.extract_skills.ExtractSkills.map_skills">
-<span class="sig-prename descclassname"><span class="pre">ExtractSkills.</span></span><span class="sig-name descname"><span class="pre">map_skills</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">predicted_skills</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">Union</span><span class="p"><span class="pre">[</span></span><span class="pre">List</span><span class="p"><span class="pre">[</span></span><span class="pre">dict</span><span class="p"><span class="pre">]</span></span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="pre">List</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">]</span></span><span class="p"><span class="pre">]</span></span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/ojd_daps_skills/pipeline/extract_skills/extract_skills.html#ExtractSkills.map_skills"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#ojd_daps_skills.pipeline.extract_skills.extract_skills.ExtractSkills.map_skills" title="Permalink to this definition">#</a></dt>
-<dd><p>Map skills from job advert(s) to a skills taxonomy. If predicted_skills is a list of skills, it will be formatted accordingly to
-be mapped to a skills taxonomy. All multiskill entities will be mapped in the same way as skill entities are.</p>
-<dl class="field-list simple">
-<dt class="field-odd">Parameters</dt>
-<dd class="field-odd"><p><strong>predicted_skills</strong> (<em>list</em><em> of </em><em>strings</em><em> or </em><em>a list</em><em> of </em><em>dicts</em>) – A list of skill entities either in the form of a list of strings (assumed to be from the same job advert) or a list of the dictionaries outputted from the get_skills function.</p>
-</dd>
-<dt class="field-even">Returns</dt>
-<dd class="field-even"><p>A list of dictionaries for each job advert containing the skill and experience entities, and for every skill entity where it maps to in the taxonomy. Multi skill entities are treated as skill entities, and the output combines them together as one. Each dictionary is in the format {‘SKILL’: [(skill_entity,(taxonomy_skill_name, taxonomy_skill_id)), …]}, ‘EXPERIENCE’: […]]</p>
-</dd>
-<dt class="field-odd">Return type</dt>
-<dd class="field-odd"><p>list of dictionaries for each job advert.</p>
-</dd>
-</dl>
-</dd></dl>
+                <dl class="py method">
+                  <dt
+                    class="sig sig-object py"
+                    id="ojd_daps_skills.pipeline.extract_skills.extract_skills.ExtractSkills.get_skills"
+                  >
+                    <span class="sig-prename descclassname"
+                      ><span class="pre">ExtractSkills.</span></span
+                    ><span class="sig-name descname"
+                      ><span class="pre">get_skills</span></span
+                    ><span class="sig-paren">(</span
+                    ><em class="sig-param"
+                      ><span class="n"
+                        ><span class="pre">job_adverts</span></span
+                      ><span class="p"><span class="pre">:</span></span
+                      ><span class="w"> </span
+                      ><span class="n"
+                        ><span class="pre">Union</span
+                        ><span class="p"><span class="pre">[</span></span
+                        ><span class="pre">str</span
+                        ><span class="p"><span class="pre">,</span></span
+                        ><span class="w"> </span><span class="pre">List</span
+                        ><span class="p"><span class="pre">[</span></span
+                        ><span class="pre">str</span
+                        ><span class="p"><span class="pre">]</span></span
+                        ><span class="p"><span class="pre">]</span></span></span
+                      ></em
+                    ><span class="sig-paren">)</span
+                    ><a
+                      class="reference internal"
+                      href="_modules/ojd_daps_skills/pipeline/extract_skills/extract_skills.html#ExtractSkills.get_skills"
+                      ><span class="viewcode-link"
+                        ><span class="pre">[source]</span></span
+                      ></a
+                    ><a
+                      class="headerlink"
+                      href="#ojd_daps_skills.pipeline.extract_skills.extract_skills.ExtractSkills.get_skills"
+                      title="Permalink to this definition"
+                      >#</a
+                    >
+                  </dt>
+                  <dd>
+                    <p>
+                      Predict skill/multiskill/experience entities using the NER
+                      model in inputted job adverts. Multiskill entities will be
+                      split up and converted into individual skill entities
+                      where possible.
+                    </p>
+                    <dl class="field-list simple">
+                      <dt class="field-odd">
+                        Parameters<span class="colon">:</span>
+                      </dt>
+                      <dd class="field-odd">
+                        <p>
+                          <strong>job_adverts</strong> (<em>str</em><em> or </em
+                          ><em>list</em><em> of </em><em>strings</em>) – The
+                          text of a job advert or a list of job adverts texts
+                        </p>
+                      </dd>
+                      <dt class="field-even">
+                        Returns<span class="colon">:</span>
+                      </dt>
+                      <dd class="field-even">
+                        <p>
+                          A list of entities extracted from each job advert in
+                          the form of dictionaries {“SKILL”: [“Microsoft
+                          Excel”], “MULTISKILL”: [], “EXPERIENCE”: []}
+                        </p>
+                      </dd>
+                      <dt class="field-odd">
+                        Return type<span class="colon">:</span>
+                      </dt>
+                      <dd class="field-odd">
+                        <p>
+                          list, the length is equal to the number of job adverts
+                          inputted
+                        </p>
+                      </dd>
+                    </dl>
+                  </dd>
+                </dl>
 
-<dl class="py method">
-<dt class="sig sig-object py" id="ojd_daps_skills.pipeline.extract_skills.extract_skills.ExtractSkills.format_skills">
-<span class="sig-prename descclassname"><span class="pre">ExtractSkills.</span></span><span class="sig-name descname"><span class="pre">format_skills</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">skills</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">List</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">]</span></span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><span class="pre">List</span><span class="p"><span class="pre">[</span></span><span class="pre">dict</span><span class="p"><span class="pre">]</span></span></span></span><a class="reference internal" href="_modules/ojd_daps_skills/pipeline/extract_skills/extract_skills.html#ExtractSkills.format_skills"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#ojd_daps_skills.pipeline.extract_skills.extract_skills.ExtractSkills.format_skills" title="Permalink to this definition">#</a></dt>
-<dd><p>Format list of skills from a single job advert to be in the format needed for mapping to a taxonomy. Also applies the
-multiskill splitting to any skills predicted to be multiskills.</p>
-<dl class="field-list simple">
-<dt class="field-odd">Parameters</dt>
-<dd class="field-odd"><p><strong>skills</strong> (<em>str</em><em> or </em><em>list</em><em> of </em><em>strings</em>) – A list of skills/multiskills from the job advert or a single skill</p>
-</dd>
-<dt class="field-even">Returns</dt>
-<dd class="field-even"><p>The skills arranged into the format [{“SKILL”: […], “MULTISKILL”: […], “EXPERIENCE”: []}]</p>
-</dd>
-<dt class="field-odd">Return type</dt>
-<dd class="field-odd"><p>a list of length 1 containing a dictionary</p>
-</dd>
-</dl>
-</dd></dl>
+                <dl class="py method">
+                  <dt
+                    class="sig sig-object py"
+                    id="ojd_daps_skills.pipeline.extract_skills.extract_skills.ExtractSkills.map_skills"
+                  >
+                    <span class="sig-prename descclassname"
+                      ><span class="pre">ExtractSkills.</span></span
+                    ><span class="sig-name descname"
+                      ><span class="pre">map_skills</span></span
+                    ><span class="sig-paren">(</span
+                    ><em class="sig-param"
+                      ><span class="n"
+                        ><span class="pre">predicted_skills</span></span
+                      ><span class="p"><span class="pre">:</span></span
+                      ><span class="w"> </span
+                      ><span class="n"
+                        ><span class="pre">Union</span
+                        ><span class="p"><span class="pre">[</span></span
+                        ><span class="pre">List</span
+                        ><span class="p"><span class="pre">[</span></span
+                        ><span class="pre">dict</span
+                        ><span class="p"><span class="pre">]</span></span
+                        ><span class="p"><span class="pre">,</span></span
+                        ><span class="w"> </span><span class="pre">List</span
+                        ><span class="p"><span class="pre">[</span></span
+                        ><span class="pre">str</span
+                        ><span class="p"><span class="pre">]</span></span
+                        ><span class="p"><span class="pre">]</span></span></span
+                      ></em
+                    ><span class="sig-paren">)</span
+                    ><a
+                      class="reference internal"
+                      href="_modules/ojd_daps_skills/pipeline/extract_skills/extract_skills.html#ExtractSkills.map_skills"
+                      ><span class="viewcode-link"
+                        ><span class="pre">[source]</span></span
+                      ></a
+                    ><a
+                      class="headerlink"
+                      href="#ojd_daps_skills.pipeline.extract_skills.extract_skills.ExtractSkills.map_skills"
+                      title="Permalink to this definition"
+                      >#</a
+                    >
+                  </dt>
+                  <dd>
+                    <p>
+                      Map skills from job advert(s) to a skills taxonomy. If
+                      predicted_skills is a list of skills, it will be formatted
+                      accordingly to be mapped to a skills taxonomy. All
+                      multiskill entities will be mapped in the same way as
+                      skill entities are.
+                    </p>
+                    <dl class="field-list simple">
+                      <dt class="field-odd">
+                        Parameters<span class="colon">:</span>
+                      </dt>
+                      <dd class="field-odd">
+                        <p>
+                          <strong>predicted_skills</strong> (<em>list</em
+                          ><em> of </em><em>strings</em><em> or </em
+                          ><em>a list</em><em> of </em><em>dicts</em>) – A list
+                          of skill entities either in the form of a list of
+                          strings (assumed to be from the same job advert) or a
+                          list of the dictionaries outputted from the get_skills
+                          function.
+                        </p>
+                      </dd>
+                      <dt class="field-even">
+                        Returns<span class="colon">:</span>
+                      </dt>
+                      <dd class="field-even">
+                        <p>
+                          A list of dictionaries for each job advert containing
+                          the skill and experience entities, and for every skill
+                          entity where it maps to in the taxonomy. Multi skill
+                          entities are treated as skill entities, and the output
+                          combines them together as one. Each dictionary is in
+                          the format {‘SKILL’:
+                          [(skill_entity,(taxonomy_skill_name,
+                          taxonomy_skill_id)), …]}, ‘EXPERIENCE’: […]]
+                        </p>
+                      </dd>
+                      <dt class="field-odd">
+                        Return type<span class="colon">:</span>
+                      </dt>
+                      <dd class="field-odd">
+                        <p>list of dictionaries for each job advert.</p>
+                      </dd>
+                    </dl>
+                  </dd>
+                </dl>
 
-</div>
+                <dl class="py method">
+                  <dt
+                    class="sig sig-object py"
+                    id="ojd_daps_skills.pipeline.extract_skills.extract_skills.ExtractSkills.format_skills"
+                  >
+                    <span class="sig-prename descclassname"
+                      ><span class="pre">ExtractSkills.</span></span
+                    ><span class="sig-name descname"
+                      ><span class="pre">format_skills</span></span
+                    ><span class="sig-paren">(</span
+                    ><em class="sig-param"
+                      ><span class="n"><span class="pre">skills</span></span
+                      ><span class="p"><span class="pre">:</span></span
+                      ><span class="w"> </span
+                      ><span class="n"
+                        ><span class="pre">List</span
+                        ><span class="p"><span class="pre">[</span></span
+                        ><span class="pre">str</span
+                        ><span class="p"><span class="pre">]</span></span></span
+                      ></em
+                    ><span class="sig-paren">)</span>
+                    <span class="sig-return"
+                      ><span class="sig-return-icon">&#x2192;</span>
+                      <span class="sig-return-typehint"
+                        ><span class="pre">List</span
+                        ><span class="p"><span class="pre">[</span></span
+                        ><span class="pre">dict</span
+                        ><span class="p"><span class="pre">]</span></span></span
+                      ></span
+                    ><a
+                      class="reference internal"
+                      href="_modules/ojd_daps_skills/pipeline/extract_skills/extract_skills.html#ExtractSkills.format_skills"
+                      ><span class="viewcode-link"
+                        ><span class="pre">[source]</span></span
+                      ></a
+                    ><a
+                      class="headerlink"
+                      href="#ojd_daps_skills.pipeline.extract_skills.extract_skills.ExtractSkills.format_skills"
+                      title="Permalink to this definition"
+                      >#</a
+                    >
+                  </dt>
+                  <dd>
+                    <p>
+                      Format list of skills from a single job advert to be in
+                      the format needed for mapping to a taxonomy. Also applies
+                      the multiskill splitting to any skills predicted to be
+                      multiskills.
+                    </p>
+                    <dl class="field-list simple">
+                      <dt class="field-odd">
+                        Parameters<span class="colon">:</span>
+                      </dt>
+                      <dd class="field-odd">
+                        <p>
+                          <strong>skills</strong> (<em>str</em><em> or </em
+                          ><em>list</em><em> of </em><em>strings</em>) – A list
+                          of skills/multiskills from the job advert or a single
+                          skill
+                        </p>
+                      </dd>
+                      <dt class="field-even">
+                        Returns<span class="colon">:</span>
+                      </dt>
+                      <dd class="field-even">
+                        <p>
+                          The skills arranged into the format [{“SKILL”: […],
+                          “MULTISKILL”: […], “EXPERIENCE”: []}]
+                        </p>
+                      </dd>
+                      <dt class="field-odd">
+                        Return type<span class="colon">:</span>
+                      </dt>
+                      <dd class="field-odd">
+                        <p>a list of length 1 containing a dictionary</p>
+                      </dd>
+                    </dl>
+                  </dd>
+                </dl>
+              </section>
+            </article>
+          </div>
+          <footer>
+            <div class="related-pages">
+              <a class="next-page" href="license.html">
+                <div class="page-info">
+                  <div class="context">
+                    <span>Next</span>
+                  </div>
+                  <div class="title">The MIT License (MIT)</div>
+                </div>
+                <svg class="furo-related-icon">
+                  <use href="#svg-arrow-right"></use>
+                </svg>
+              </a>
+              <a class="prev-page" href="labelling.html">
+                <svg class="furo-related-icon">
+                  <use href="#svg-arrow-right"></use>
+                </svg>
+                <div class="page-info">
+                  <div class="context">
+                    <span>Previous</span>
+                  </div>
 
-        </article>
-      </div>
-      <footer>
-        
-        <div class="related-pages">
-          <a class="next-page" href="license.html">
-              <div class="page-info">
-                <div class="context">
-                  <span>Next</span>
+                  <div class="title">Entity Labelling</div>
                 </div>
-                <div class="title">The MIT License (MIT)</div>
-              </div>
-              <svg class="furo-related-icon"><use href="#svg-arrow-right"></use></svg>
-            </a>
-          <a class="prev-page" href="labelling.html">
-              <svg class="furo-related-icon"><use href="#svg-arrow-right"></use></svg>
-              <div class="page-info">
-                <div class="context">
-                  <span>Previous</span>
+              </a>
+            </div>
+            <div class="bottom-of-page">
+              <div class="left-details">
+                <div class="copyright">
+                  Copyright &#169; 2022, Liz Gallagher, India Kerle
                 </div>
-                
-                <div class="title">Entity Labelling</div>
-                
+                Made with <a href="https://www.sphinx-doc.org/">Sphinx</a> and
+                <a class="muted-link" href="https://pradyunsg.me">@pradyunsg</a
+                >'s
+
+                <a href="https://github.com/pradyunsg/furo">Furo</a>
               </div>
-            </a>
+              <div class="right-details">
+                <div class="icons"></div>
+              </div>
+            </div>
+          </footer>
         </div>
-        <div class="bottom-of-page">
-          <div class="left-details">
-            <div class="copyright">
-                Copyright &#169; 2022, Liz Gallagher, India Kerle
+        <aside class="toc-drawer">
+          <div class="toc-sticky toc-scroll">
+            <div class="toc-title-container">
+              <span class="toc-title"> On this page </span>
             </div>
-            Made with <a href="https://www.sphinx-doc.org/">Sphinx</a> and <a class="muted-link" href="https://pradyunsg.me">@pradyunsg</a>'s
-            
-            <a href="https://github.com/pradyunsg/furo">Furo</a>
-            
-          </div>
-          <div class="right-details">
-            <div class="icons">
-              
+            <div class="toc-tree-container">
+              <div class="toc-tree">
+                <ul>
+                  <li>
+                    <a class="reference internal" href="#"
+                      >The
+                      <code class="docutils literal notranslate"
+                        ><span class="pre">ExtractSkills</span></code
+                      >
+                      class</a
+                    >
+                    <ul>
+                      <li>
+                        <a
+                          class="reference internal"
+                          href="#ojd_daps_skills.pipeline.extract_skills.extract_skills.ExtractSkills"
+                          ><code class="docutils literal notranslate"
+                            ><span class="pre">ExtractSkills</span></code
+                          ></a
+                        >
+                        <ul>
+                          <li>
+                            <a
+                              class="reference internal"
+                              href="#ojd_daps_skills.pipeline.extract_skills.extract_skills.ExtractSkills.load"
+                              ><code class="docutils literal notranslate"
+                                ><span class="pre"
+                                  >ExtractSkills.load()</span
+                                ></code
+                              ></a
+                            >
+                          </li>
+                          <li>
+                            <a
+                              class="reference internal"
+                              href="#ojd_daps_skills.pipeline.extract_skills.extract_skills.ExtractSkills.extract_skills"
+                              ><code class="docutils literal notranslate"
+                                ><span class="pre"
+                                  >ExtractSkills.extract_skills()</span
+                                ></code
+                              ></a
+                            >
+                          </li>
+                          <li>
+                            <a
+                              class="reference internal"
+                              href="#ojd_daps_skills.pipeline.extract_skills.extract_skills.ExtractSkills.get_skills"
+                              ><code class="docutils literal notranslate"
+                                ><span class="pre"
+                                  >ExtractSkills.get_skills()</span
+                                ></code
+                              ></a
+                            >
+                          </li>
+                          <li>
+                            <a
+                              class="reference internal"
+                              href="#ojd_daps_skills.pipeline.extract_skills.extract_skills.ExtractSkills.map_skills"
+                              ><code class="docutils literal notranslate"
+                                ><span class="pre"
+                                  >ExtractSkills.map_skills()</span
+                                ></code
+                              ></a
+                            >
+                          </li>
+                          <li>
+                            <a
+                              class="reference internal"
+                              href="#ojd_daps_skills.pipeline.extract_skills.extract_skills.ExtractSkills.format_skills"
+                              ><code class="docutils literal notranslate"
+                                ><span class="pre"
+                                  >ExtractSkills.format_skills()</span
+                                ></code
+                              ></a
+                            >
+                          </li>
+                        </ul>
+                      </li>
+                    </ul>
+                  </li>
+                </ul>
+              </div>
             </div>
           </div>
-        </div>
-        
-      </footer>
-    </div>
-    <aside class="toc-drawer">
-      
-      
-      <div class="toc-sticky toc-scroll">
-        <div class="toc-title-container">
-          <span class="toc-title">
-            On this page
-          </span>
-        </div>
-        <div class="toc-tree-container">
-          <div class="toc-tree">
-            <ul>
-<li><a class="reference internal" href="#">The <code class="docutils literal notranslate"><span class="pre">ExtractSkills</span></code> class</a><ul>
-<li><a class="reference internal" href="#ojd_daps_skills.pipeline.extract_skills.extract_skills.ExtractSkills"><code class="docutils literal notranslate"><span class="pre">ExtractSkills</span></code></a><ul>
-<li><a class="reference internal" href="#ojd_daps_skills.pipeline.extract_skills.extract_skills.ExtractSkills.load"><code class="docutils literal notranslate"><span class="pre">ExtractSkills.load()</span></code></a></li>
-<li><a class="reference internal" href="#ojd_daps_skills.pipeline.extract_skills.extract_skills.ExtractSkills.extract_skills"><code class="docutils literal notranslate"><span class="pre">ExtractSkills.extract_skills()</span></code></a></li>
-<li><a class="reference internal" href="#ojd_daps_skills.pipeline.extract_skills.extract_skills.ExtractSkills.get_skills"><code class="docutils literal notranslate"><span class="pre">ExtractSkills.get_skills()</span></code></a></li>
-<li><a class="reference internal" href="#ojd_daps_skills.pipeline.extract_skills.extract_skills.ExtractSkills.map_skills"><code class="docutils literal notranslate"><span class="pre">ExtractSkills.map_skills()</span></code></a></li>
-<li><a class="reference internal" href="#ojd_daps_skills.pipeline.extract_skills.extract_skills.ExtractSkills.format_skills"><code class="docutils literal notranslate"><span class="pre">ExtractSkills.format_skills()</span></code></a></li>
-</ul>
-</li>
-</ul>
-</li>
-</ul>
-
-          </div>
-        </div>
+        </aside>
       </div>
-      
-      
-    </aside>
-  </div>
-</div><script data-url_root="./" id="documentation_options" src="_static/documentation_options.js"></script>
+    </div>
+    <script
+      data-url_root="./"
+      id="documentation_options"
+      src="_static/documentation_options.js"
+    ></script>
     <script src="_static/jquery.js"></script>
     <script src="_static/underscore.js"></script>
     <script src="_static/_sphinx_javascript_frameworks_compat.js"></script>
     <script src="_static/doctools.js"></script>
     <script src="_static/sphinx_highlight.js"></script>
     <script src="_static/scripts/furo.js"></script>
-    </body>
-</html>
\ No newline at end of file
+  </body>
+</html>
diff --git a/docs/build/html/index.html b/docs/build/html/index.html
index 57a0c155..5fa0b5cb 100644
--- a/docs/build/html/index.html
+++ b/docs/build/html/index.html
@@ -1,336 +1,791 @@
-<!doctype html>
+<!DOCTYPE html>
 <html class="no-js" lang="en">
-  <head><meta charset="utf-8"/>
-    <meta name="viewport" content="width=device-width,initial-scale=1"/>
-    <meta name="color-scheme" content="light dark"><link rel="author" title="About these documents" href="about.html" /><link rel="index" title="Index" href="genindex.html" /><link rel="search" title="Search" href="search.html" /><link rel="next" title="Skills Extractor" href="about.html" />
+  <head>
+    <meta charset="utf-8" />
+    <meta name="viewport" content="width=device-width,initial-scale=1" />
+    <meta name="color-scheme" content="light dark" />
+    <meta
+      name="generator"
+      content="Docutils 0.18.1: http://docutils.sourceforge.net/"
+    />
+    <link rel="author" title="About these documents" href="about.html" />
+    <link rel="index" title="Index" href="genindex.html" />
+    <link rel="search" title="Search" href="search.html" />
+    <link rel="next" title="Skills Extractor" href="about.html" />
 
-    <meta name="generator" content="sphinx-5.3.0, furo 2022.09.29"/>
-        <title>Skills Extractor v1.0.1 documentation</title>
-      <link rel="stylesheet" type="text/css" href="_static/pygments.css" />
-    <link rel="stylesheet" type="text/css" href="_static/styles/furo.css?digest=d81277517bee4d6b0349d71bb2661d4890b5617c" />
-    <link rel="stylesheet" type="text/css" href="_static/styles/furo-extensions.css?digest=30d1aed668e5c3a91c3e3bf6a60b675221979f0e" />
-    
-    
+    <meta name="generator" content="sphinx-5.3.0, furo 2022.09.29" />
+    <title>Skills Extractor v1.0.1 documentation</title>
+    <link rel="stylesheet" type="text/css" href="_static/pygments.css" />
+    <link
+      rel="stylesheet"
+      type="text/css"
+      href="_static/styles/furo.css?digest=d81277517bee4d6b0349d71bb2661d4890b5617c"
+    />
+    <link
+      rel="stylesheet"
+      type="text/css"
+      href="_static/styles/furo-extensions.css?digest=30d1aed668e5c3a91c3e3bf6a60b675221979f0e"
+    />
 
-
-<style>
-  body {
-    --color-code-background: #f8f8f8;
-  --color-code-foreground: black;
-  
-  }
-  @media not print {
-    body[data-theme="dark"] {
-      --color-code-background: #202020;
-  --color-code-foreground: #d0d0d0;
-  
-    }
-    @media (prefers-color-scheme: dark) {
-      body:not([data-theme="light"]) {
-        --color-code-background: #202020;
-  --color-code-foreground: #d0d0d0;
-  
+    <style>
+      body {
+        --color-code-background: #f8f8f8;
+        --color-code-foreground: black;
+      }
+      @media not print {
+        body[data-theme="dark"] {
+          --color-code-background: #202020;
+          --color-code-foreground: #d0d0d0;
+        }
+        @media (prefers-color-scheme: dark) {
+          body:not([data-theme="light"]) {
+            --color-code-background: #202020;
+            --color-code-foreground: #d0d0d0;
+          }
+        }
       }
-    }
-  }
-</style></head>
+    </style>
+  </head>
   <body>
-    
     <script>
       document.body.dataset.theme = localStorage.getItem("theme") || "auto";
     </script>
-    
 
-<svg xmlns="http://www.w3.org/2000/svg" style="display: none;">
-  <symbol id="svg-toc" viewBox="0 0 24 24">
-    <title>Contents</title>
-    <svg stroke="currentColor" fill="currentColor" stroke-width="0" viewBox="0 0 1024 1024">
-      <path d="M408 442h480c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8H408c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8zm-8 204c0 4.4 3.6 8 8 8h480c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8H408c-4.4 0-8 3.6-8 8v56zm504-486H120c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8h784c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8zm0 632H120c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8h784c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8zM115.4 518.9L271.7 642c5.8 4.6 14.4.5 14.4-6.9V388.9c0-7.4-8.5-11.5-14.4-6.9L115.4 505.1a8.74 8.74 0 0 0 0 13.8z"/>
-    </svg>
-  </symbol>
-  <symbol id="svg-menu" viewBox="0 0 24 24">
-    <title>Menu</title>
-    <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
-      stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="feather-menu">
-      <line x1="3" y1="12" x2="21" y2="12"></line>
-      <line x1="3" y1="6" x2="21" y2="6"></line>
-      <line x1="3" y1="18" x2="21" y2="18"></line>
-    </svg>
-  </symbol>
-  <symbol id="svg-arrow-right" viewBox="0 0 24 24">
-    <title>Expand</title>
-    <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
-      stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="feather-chevron-right">
-      <polyline points="9 18 15 12 9 6"></polyline>
-    </svg>
-  </symbol>
-  <symbol id="svg-sun" viewBox="0 0 24 24">
-    <title>Light mode</title>
-    <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
-      stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round" class="feather-sun">
-      <circle cx="12" cy="12" r="5"></circle>
-      <line x1="12" y1="1" x2="12" y2="3"></line>
-      <line x1="12" y1="21" x2="12" y2="23"></line>
-      <line x1="4.22" y1="4.22" x2="5.64" y2="5.64"></line>
-      <line x1="18.36" y1="18.36" x2="19.78" y2="19.78"></line>
-      <line x1="1" y1="12" x2="3" y2="12"></line>
-      <line x1="21" y1="12" x2="23" y2="12"></line>
-      <line x1="4.22" y1="19.78" x2="5.64" y2="18.36"></line>
-      <line x1="18.36" y1="5.64" x2="19.78" y2="4.22"></line>
+    <svg xmlns="http://www.w3.org/2000/svg" style="display: none">
+      <symbol id="svg-toc" viewBox="0 0 24 24">
+        <title>Contents</title>
+        <svg
+          stroke="currentColor"
+          fill="currentColor"
+          stroke-width="0"
+          viewBox="0 0 1024 1024"
+        >
+          <path
+            d="M408 442h480c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8H408c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8zm-8 204c0 4.4 3.6 8 8 8h480c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8H408c-4.4 0-8 3.6-8 8v56zm504-486H120c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8h784c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8zm0 632H120c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8h784c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8zM115.4 518.9L271.7 642c5.8 4.6 14.4.5 14.4-6.9V388.9c0-7.4-8.5-11.5-14.4-6.9L115.4 505.1a8.74 8.74 0 0 0 0 13.8z"
+          />
+        </svg>
+      </symbol>
+      <symbol id="svg-menu" viewBox="0 0 24 24">
+        <title>Menu</title>
+        <svg
+          xmlns="http://www.w3.org/2000/svg"
+          viewBox="0 0 24 24"
+          fill="none"
+          stroke="currentColor"
+          stroke-width="2"
+          stroke-linecap="round"
+          stroke-linejoin="round"
+          class="feather-menu"
+        >
+          <line x1="3" y1="12" x2="21" y2="12"></line>
+          <line x1="3" y1="6" x2="21" y2="6"></line>
+          <line x1="3" y1="18" x2="21" y2="18"></line>
+        </svg>
+      </symbol>
+      <symbol id="svg-arrow-right" viewBox="0 0 24 24">
+        <title>Expand</title>
+        <svg
+          xmlns="http://www.w3.org/2000/svg"
+          viewBox="0 0 24 24"
+          fill="none"
+          stroke="currentColor"
+          stroke-width="2"
+          stroke-linecap="round"
+          stroke-linejoin="round"
+          class="feather-chevron-right"
+        >
+          <polyline points="9 18 15 12 9 6"></polyline>
+        </svg>
+      </symbol>
+      <symbol id="svg-sun" viewBox="0 0 24 24">
+        <title>Light mode</title>
+        <svg
+          xmlns="http://www.w3.org/2000/svg"
+          viewBox="0 0 24 24"
+          fill="none"
+          stroke="currentColor"
+          stroke-width="1.5"
+          stroke-linecap="round"
+          stroke-linejoin="round"
+          class="feather-sun"
+        >
+          <circle cx="12" cy="12" r="5"></circle>
+          <line x1="12" y1="1" x2="12" y2="3"></line>
+          <line x1="12" y1="21" x2="12" y2="23"></line>
+          <line x1="4.22" y1="4.22" x2="5.64" y2="5.64"></line>
+          <line x1="18.36" y1="18.36" x2="19.78" y2="19.78"></line>
+          <line x1="1" y1="12" x2="3" y2="12"></line>
+          <line x1="21" y1="12" x2="23" y2="12"></line>
+          <line x1="4.22" y1="19.78" x2="5.64" y2="18.36"></line>
+          <line x1="18.36" y1="5.64" x2="19.78" y2="4.22"></line>
+        </svg>
+      </symbol>
+      <symbol id="svg-moon" viewBox="0 0 24 24">
+        <title>Dark mode</title>
+        <svg
+          xmlns="http://www.w3.org/2000/svg"
+          viewBox="0 0 24 24"
+          fill="none"
+          stroke="currentColor"
+          stroke-width="1.5"
+          stroke-linecap="round"
+          stroke-linejoin="round"
+          class="icon-tabler-moon"
+        >
+          <path stroke="none" d="M0 0h24v24H0z" fill="none" />
+          <path
+            d="M12 3c.132 0 .263 0 .393 0a7.5 7.5 0 0 0 7.92 12.446a9 9 0 1 1 -8.313 -12.454z"
+          />
+        </svg>
+      </symbol>
+      <symbol id="svg-sun-half" viewBox="0 0 24 24">
+        <title>Auto light/dark mode</title>
+        <svg
+          xmlns="http://www.w3.org/2000/svg"
+          viewBox="0 0 24 24"
+          fill="none"
+          stroke="currentColor"
+          stroke-width="1.5"
+          stroke-linecap="round"
+          stroke-linejoin="round"
+          class="icon-tabler-shadow"
+        >
+          <path stroke="none" d="M0 0h24v24H0z" fill="none" />
+          <circle cx="12" cy="12" r="9" />
+          <path d="M13 12h5" />
+          <path d="M13 15h4" />
+          <path d="M13 18h1" />
+          <path d="M13 9h4" />
+          <path d="M13 6h1" />
+        </svg>
+      </symbol>
     </svg>
-  </symbol>
-  <symbol id="svg-moon" viewBox="0 0 24 24">
-    <title>Dark mode</title>
-    <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
-      stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round" class="icon-tabler-moon">
-      <path stroke="none" d="M0 0h24v24H0z" fill="none" />
-      <path d="M12 3c.132 0 .263 0 .393 0a7.5 7.5 0 0 0 7.92 12.446a9 9 0 1 1 -8.313 -12.454z" />
-    </svg>
-  </symbol>
-  <symbol id="svg-sun-half" viewBox="0 0 24 24">
-    <title>Auto light/dark mode</title>
-    <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
-      stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round" class="icon-tabler-shadow">
-      <path stroke="none" d="M0 0h24v24H0z" fill="none"/>
-      <circle cx="12" cy="12" r="9" />
-      <path d="M13 12h5" />
-      <path d="M13 15h4" />
-      <path d="M13 18h1" />
-      <path d="M13 9h4" />
-      <path d="M13 6h1" />
-    </svg>
-  </symbol>
-</svg>
-
-<input type="checkbox" class="sidebar-toggle" name="__navigation" id="__navigation">
-<input type="checkbox" class="sidebar-toggle" name="__toc" id="__toc">
-<label class="overlay sidebar-overlay" for="__navigation">
-  <div class="visually-hidden">Hide navigation sidebar</div>
-</label>
-<label class="overlay toc-overlay" for="__toc">
-  <div class="visually-hidden">Hide table of contents sidebar</div>
-</label>
-
-
 
-<div class="page">
-  <header class="mobile-header">
-    <div class="header-left">
-      <label class="nav-overlay-icon" for="__navigation">
-        <div class="visually-hidden">Toggle site navigation sidebar</div>
-        <i class="icon"><svg><use href="#svg-menu"></use></svg></i>
-      </label>
-    </div>
-    <div class="header-center">
-      <a href="#"><div class="brand">Skills Extractor v1.0.1 documentation</div></a>
-    </div>
-    <div class="header-right">
-      <div class="theme-toggle-container theme-toggle-header">
-        <button class="theme-toggle">
-          <div class="visually-hidden">Toggle Light / Dark / Auto color theme</div>
-          <svg class="theme-icon-when-auto"><use href="#svg-sun-half"></use></svg>
-          <svg class="theme-icon-when-dark"><use href="#svg-moon"></use></svg>
-          <svg class="theme-icon-when-light"><use href="#svg-sun"></use></svg>
-        </button>
-      </div>
-      <label class="toc-overlay-icon toc-header-icon no-toc" for="__toc">
-        <div class="visually-hidden">Toggle table of contents sidebar</div>
-        <i class="icon"><svg><use href="#svg-toc"></use></svg></i>
-      </label>
-    </div>
-  </header>
-  <aside class="sidebar-drawer">
-    <div class="sidebar-container">
-      
-      <div class="sidebar-sticky"><a class="sidebar-brand centered" href="#">
-  
-  <div class="sidebar-logo-container">
-    <img class="sidebar-logo" src="_static/nesta_escoe_transparent.png" alt="Logo"/>
-  </div>
-  
-  <span class="sidebar-brand-text">Skills Extractor v1.0.1 documentation</span>
-  
-</a><form class="sidebar-search-container" method="get" action="search.html" role="search">
-  <input class="sidebar-search" placeholder=Search name="q" aria-label="Search">
-  <input type="hidden" name="check_keywords" value="yes">
-  <input type="hidden" name="area" value="default">
-</form>
-<div id="searchbox"></div><div class="sidebar-scroll"><div class="sidebar-tree">
-  <ul>
-<li class="toctree-l1"><a class="reference internal" href="about.html">Skills Extractor</a></li>
-<li class="toctree-l1"><a class="reference internal" href="custom_usage.html">Custom Usage</a></li>
-<li class="toctree-l1"><a class="reference internal" href="pipeline_summary.html">Pipeline summary and metrics</a></li>
-<li class="toctree-l1"><a class="reference internal" href="model_card.html">Model Cards</a></li>
-<li class="toctree-l1"><a class="reference internal" href="labelling.html">Entity Labelling</a></li>
-<li class="toctree-l1"><a class="reference internal" href="extract_skills.html">The <code class="docutils literal notranslate"><span class="pre">ExtractSkills</span></code> class</a></li>
-<li class="toctree-l1"><a class="reference internal" href="license.html">The MIT License (MIT)</a></li>
-</ul>
-
-</div>
-</div>
+    <input
+      type="checkbox"
+      class="sidebar-toggle"
+      name="__navigation"
+      id="__navigation"
+    />
+    <input type="checkbox" class="sidebar-toggle" name="__toc" id="__toc" />
+    <label class="overlay sidebar-overlay" for="__navigation">
+      <div class="visually-hidden">Hide navigation sidebar</div>
+    </label>
+    <label class="overlay toc-overlay" for="__toc">
+      <div class="visually-hidden">Hide table of contents sidebar</div>
+    </label>
 
-      </div>
-      
-    </div>
-  </aside>
-  <div class="main">
-    <div class="content">
-      <div class="article-container">
-        <a href="#" class="back-to-top muted-link">
-          <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24">
-            <path d="M13 20h-2V8l-5.5 5.5-1.42-1.42L12 4.16l7.92 7.92-1.42 1.42L13 8v12z"></path>
-          </svg>
-          <span>Back to top</span>
-        </a>
-        <div class="content-icon-container">
-          
-<div class="theme-toggle-container theme-toggle-content">
+    <div class="page">
+      <header class="mobile-header">
+        <div class="header-left">
+          <label class="nav-overlay-icon" for="__navigation">
+            <div class="visually-hidden">Toggle site navigation sidebar</div>
+            <i class="icon"
+              ><svg><use href="#svg-menu"></use></svg
+            ></i>
+          </label>
+        </div>
+        <div class="header-center">
+          <a href="#"
+            ><div class="brand">Skills Extractor v1.0.1 documentation</div></a
+          >
+        </div>
+        <div class="header-right">
+          <div class="theme-toggle-container theme-toggle-header">
             <button class="theme-toggle">
-              <div class="visually-hidden">Toggle Light / Dark / Auto color theme</div>
-              <svg class="theme-icon-when-auto"><use href="#svg-sun-half"></use></svg>
-              <svg class="theme-icon-when-dark"><use href="#svg-moon"></use></svg>
-              <svg class="theme-icon-when-light"><use href="#svg-sun"></use></svg>
+              <div class="visually-hidden">
+                Toggle Light / Dark / Auto color theme
+              </div>
+              <svg class="theme-icon-when-auto">
+                <use href="#svg-sun-half"></use>
+              </svg>
+              <svg class="theme-icon-when-dark">
+                <use href="#svg-moon"></use>
+              </svg>
+              <svg class="theme-icon-when-light">
+                <use href="#svg-sun"></use>
+              </svg>
             </button>
           </div>
-          <label class="toc-overlay-icon toc-content-icon no-toc" for="__toc">
+          <label class="toc-overlay-icon toc-header-icon no-toc" for="__toc">
             <div class="visually-hidden">Toggle table of contents sidebar</div>
-            <i class="icon"><svg><use href="#svg-toc"></use></svg></i>
+            <i class="icon"
+              ><svg><use href="#svg-toc"></use></svg
+            ></i>
           </label>
         </div>
-        <article role="main">
-          <div class="toctree-wrapper compound">
-<ul>
-<li class="toctree-l1"><a class="reference internal" href="about.html">Skills Extractor</a><ul>
-<li class="toctree-l2"><a class="reference internal" href="about.html#welcome-to-nesta-s-skills-extractor-library">Welcome to Nesta’s Skills Extractor Library</a></li>
-<li class="toctree-l2"><a class="reference internal" href="about.html#installation-a-name-installation-a">Installation <a name="installation"></a></a><ul>
-<li class="toctree-l3"><a class="reference internal" href="about.html#aws-cli">AWS CLI</a></li>
-</ul>
-</li>
-<li class="toctree-l2"><a class="reference internal" href="about.html#tl-dr-using-nesta-s-skills-extractor-library-a-name-usage-a">TL;DR: Using Nesta’s Skills Extractor library <a name="usage"></a></a><ul>
-<li class="toctree-l3"><a class="reference internal" href="about.html#extract-and-map-skills">1. Extract AND map skills</a></li>
-<li class="toctree-l3"><a class="reference internal" href="about.html#extract-skills">2. Extract skills</a></li>
-<li class="toctree-l3"><a class="reference internal" href="about.html#map-skills">3. Map skills</a></li>
-</ul>
-</li>
-<li class="toctree-l2"><a class="reference internal" href="about.html#app">App</a></li>
-<li class="toctree-l2"><a class="reference internal" href="about.html#development-a-name-development-a">Development <a name="development"></a></a><ul>
-<li class="toctree-l3"><a class="reference internal" href="about.html#setup">Setup</a></li>
-<li class="toctree-l3"><a class="reference internal" href="about.html#project-structure">Project structure</a></li>
-<li class="toctree-l3"><a class="reference internal" href="about.html#testing">Testing</a></li>
-<li class="toctree-l3"><a class="reference internal" href="about.html#analysis">Analysis</a></li>
-<li class="toctree-l3"><a class="reference internal" href="about.html#contributor-guidelines">Contributor guidelines</a></li>
-</ul>
-</li>
-</ul>
-</li>
-<li class="toctree-l1"><a class="reference internal" href="custom_usage.html">Custom Usage</a><ul>
-<li class="toctree-l2"><a class="reference internal" href="custom_usage.html#configuration-files-a-name-config-files-a">Configuration files <a name="config_files"></a></a><ul>
-<li class="toctree-l3"><a class="reference internal" href="custom_usage.html#predefined-configurations-a-name-predefined-config-a">Predefined configurations <a name="predefined_config"></a></a></li>
-<li class="toctree-l3"><a class="reference internal" href="custom_usage.html#configuration-definitions-a-name-config-defs-a">Configuration definitions <a name="config_defs"></a></a></li>
-</ul>
-</li>
-<li class="toctree-l2"><a class="reference internal" href="custom_usage.html#mapping-to-your-own-taxonomy-a-name-mapping-a">Mapping to your own taxonomy <a name="mapping"></a></a><ul>
-<li class="toctree-l3"><a class="reference internal" href="custom_usage.html#format-your-taxonomy-a-name-format-tax-a">Format your taxonomy <a name="format_tax"></a></a></li>
-<li class="toctree-l3"><a class="reference internal" href="custom_usage.html#define-your-own-configuration-file-a-name-custom-config-a">Define your own configuration file <a name="custom_config"></a></a></li>
-</ul>
-</li>
-</ul>
-</li>
-<li class="toctree-l1"><a class="reference internal" href="pipeline_summary.html">Pipeline summary and metrics</a><ul>
-<li class="toctree-l2"><a class="reference internal" href="pipeline_summary.html#intended-use">Intended Use</a></li>
-<li class="toctree-l2"><a class="reference internal" href="pipeline_summary.html#out-of-scope-uses">Out of Scope Uses</a></li>
-<li class="toctree-l2"><a class="reference internal" href="pipeline_summary.html#metrics">Metrics</a><ul>
-<li class="toctree-l3"><a class="reference internal" href="pipeline_summary.html#comparison-1-top-skill-groups-per-occupation-comparison-to-esco-essential-skill-groups-per-occupation">Comparison 1 - Top skill groups per occupation comparison to ESCO essential skill groups per occupation</a></li>
-<li class="toctree-l3"><a class="reference internal" href="pipeline_summary.html#comparison-2-degree-of-overlap-between-lightcasts-extracted-skills-and-our-lightcast-skills">Comparison 2 - Degree of overlap between Lightcast’s extracted skills and our Lightcast skills</a></li>
-<li class="toctree-l3"><a class="reference internal" href="pipeline_summary.html#evaluation-1-manual-judgement-of-false-positive-rate">Evaluation 1 - Manual judgement of false positive rate</a></li>
-<li class="toctree-l3"><a class="reference internal" href="pipeline_summary.html#evaluation-2-manual-judgement-of-skills-extraction-and-mapping-quality">Evaluation 2 - Manual judgement of skills extraction and mapping quality</a></li>
-</ul>
-</li>
-</ul>
-</li>
-<li class="toctree-l1"><a class="reference internal" href="model_card.html">Model Cards</a><ul>
-<li class="toctree-l2"><a class="reference internal" href="model_card.html#model-card-named-entity-recognition-model-a-name-extract-skills-card-a">Model Card: Named Entity Recognition Model <a name="extract_skills_card"></a></a><ul>
-<li class="toctree-l3"><a class="reference internal" href="model_card.html#summary">Summary</a></li>
-<li class="toctree-l3"><a class="reference internal" href="model_card.html#training">Training</a></li>
-<li class="toctree-l3"><a class="reference internal" href="model_card.html#ner-metrics">NER Metrics</a></li>
-<li class="toctree-l3"><a class="reference internal" href="model_card.html#multiskill-metrics">Multiskill Metrics</a></li>
-<li class="toctree-l3"><a class="reference internal" href="model_card.html#caveats-and-recommendations">Caveats and Recommendations</a></li>
-</ul>
-</li>
-<li class="toctree-l2"><a class="reference internal" href="model_card.html#model-card-skills-to-taxonomy-mapping-a-name-mapping-card-a">Model Card: Skills to Taxonomy Mapping <a name="mapping_card"></a></a><ul>
-<li class="toctree-l3"><a class="reference internal" href="model_card.html#id1">Summary</a></li>
-<li class="toctree-l3"><a class="reference internal" href="model_card.html#model-factors">Model Factors</a></li>
-<li class="toctree-l3"><a class="reference internal" href="model_card.html#id2">Caveats and Recommendations</a></li>
-</ul>
-</li>
-</ul>
-</li>
-<li class="toctree-l1"><a class="reference internal" href="labelling.html">Entity Labelling</a><ul>
-<li class="toctree-l2"><a class="reference internal" href="labelling.html#training-dataset">Training dataset</a></li>
-</ul>
-</li>
-<li class="toctree-l1"><a class="reference internal" href="extract_skills.html">The <code class="docutils literal notranslate"><span class="pre">ExtractSkills</span></code> class</a><ul>
-<li class="toctree-l2"><a class="reference internal" href="extract_skills.html#ojd_daps_skills.pipeline.extract_skills.extract_skills.ExtractSkills"><code class="docutils literal notranslate"><span class="pre">ExtractSkills</span></code></a><ul>
-<li class="toctree-l3"><a class="reference internal" href="extract_skills.html#ojd_daps_skills.pipeline.extract_skills.extract_skills.ExtractSkills.load"><code class="docutils literal notranslate"><span class="pre">ExtractSkills.load()</span></code></a></li>
-<li class="toctree-l3"><a class="reference internal" href="extract_skills.html#ojd_daps_skills.pipeline.extract_skills.extract_skills.ExtractSkills.extract_skills"><code class="docutils literal notranslate"><span class="pre">ExtractSkills.extract_skills()</span></code></a></li>
-<li class="toctree-l3"><a class="reference internal" href="extract_skills.html#ojd_daps_skills.pipeline.extract_skills.extract_skills.ExtractSkills.get_skills"><code class="docutils literal notranslate"><span class="pre">ExtractSkills.get_skills()</span></code></a></li>
-<li class="toctree-l3"><a class="reference internal" href="extract_skills.html#ojd_daps_skills.pipeline.extract_skills.extract_skills.ExtractSkills.map_skills"><code class="docutils literal notranslate"><span class="pre">ExtractSkills.map_skills()</span></code></a></li>
-<li class="toctree-l3"><a class="reference internal" href="extract_skills.html#ojd_daps_skills.pipeline.extract_skills.extract_skills.ExtractSkills.format_skills"><code class="docutils literal notranslate"><span class="pre">ExtractSkills.format_skills()</span></code></a></li>
-</ul>
-</li>
-</ul>
-</li>
-<li class="toctree-l1"><a class="reference internal" href="license.html">The MIT License (MIT)</a></li>
-</ul>
-</div>
-
-        </article>
-      </div>
-      <footer>
-        
-        <div class="related-pages">
-          <a class="next-page" href="about.html">
-              <div class="page-info">
-                <div class="context">
-                  <span>Next</span>
-                </div>
-                <div class="title">Skills Extractor</div>
+      </header>
+      <aside class="sidebar-drawer">
+        <div class="sidebar-container">
+          <div class="sidebar-sticky">
+            <a class="sidebar-brand centered" href="#">
+              <div class="sidebar-logo-container">
+                <img
+                  class="sidebar-logo"
+                  src="_static/nesta_escoe_transparent.png"
+                  alt="Logo"
+                />
               </div>
-              <svg class="furo-related-icon"><use href="#svg-arrow-right"></use></svg>
+
+              <span class="sidebar-brand-text"
+                >Skills Extractor v1.0.1 documentation</span
+              >
             </a>
-          
-        </div>
-        <div class="bottom-of-page">
-          <div class="left-details">
-            <div class="copyright">
-                Copyright &#169; 2022, Liz Gallagher, India Kerle
+            <form
+              class="sidebar-search-container"
+              method="get"
+              action="search.html"
+              role="search"
+            >
+              <input
+                class="sidebar-search"
+                placeholder="Search"
+                name="q"
+                aria-label="Search"
+              />
+              <input type="hidden" name="check_keywords" value="yes" />
+              <input type="hidden" name="area" value="default" />
+            </form>
+            <div id="searchbox"></div>
+            <div class="sidebar-scroll">
+              <div class="sidebar-tree">
+                <ul>
+                  <li class="toctree-l1">
+                    <a class="reference internal" href="about.html"
+                      >Skills Extractor</a
+                    >
+                  </li>
+                  <li class="toctree-l1">
+                    <a class="reference internal" href="custom_usage.html"
+                      >Custom Usage</a
+                    >
+                  </li>
+                  <li class="toctree-l1">
+                    <a class="reference internal" href="pipeline_summary.html"
+                      >Pipeline summary and metrics</a
+                    >
+                  </li>
+                  <li class="toctree-l1">
+                    <a class="reference internal" href="model_card.html"
+                      >Model Cards</a
+                    >
+                  </li>
+                  <li class="toctree-l1">
+                    <a class="reference internal" href="labelling.html"
+                      >Entity Labelling</a
+                    >
+                  </li>
+                  <li class="toctree-l1">
+                    <a class="reference internal" href="extract_skills.html"
+                      >The
+                      <code class="docutils literal notranslate"
+                        ><span class="pre">ExtractSkills</span></code
+                      >
+                      class</a
+                    >
+                  </li>
+                  <li class="toctree-l1">
+                    <a class="reference internal" href="license.html"
+                      >The MIT License (MIT)</a
+                    >
+                  </li>
+                </ul>
+              </div>
             </div>
-            Made with <a href="https://www.sphinx-doc.org/">Sphinx</a> and <a class="muted-link" href="https://pradyunsg.me">@pradyunsg</a>'s
-            
-            <a href="https://github.com/pradyunsg/furo">Furo</a>
-            
           </div>
-          <div class="right-details">
-            <div class="icons">
-              
+        </div>
+      </aside>
+      <div class="main">
+        <div class="content">
+          <div class="article-container">
+            <a href="#" class="back-to-top muted-link">
+              <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24">
+                <path
+                  d="M13 20h-2V8l-5.5 5.5-1.42-1.42L12 4.16l7.92 7.92-1.42 1.42L13 8v12z"
+                ></path>
+              </svg>
+              <span>Back to top</span>
+            </a>
+            <div class="content-icon-container">
+              <div class="theme-toggle-container theme-toggle-content">
+                <button class="theme-toggle">
+                  <div class="visually-hidden">
+                    Toggle Light / Dark / Auto color theme
+                  </div>
+                  <svg class="theme-icon-when-auto">
+                    <use href="#svg-sun-half"></use>
+                  </svg>
+                  <svg class="theme-icon-when-dark">
+                    <use href="#svg-moon"></use>
+                  </svg>
+                  <svg class="theme-icon-when-light">
+                    <use href="#svg-sun"></use>
+                  </svg>
+                </button>
+              </div>
+              <label
+                class="toc-overlay-icon toc-content-icon no-toc"
+                for="__toc"
+              >
+                <div class="visually-hidden">
+                  Toggle table of contents sidebar
+                </div>
+                <i class="icon"
+                  ><svg><use href="#svg-toc"></use></svg
+                ></i>
+              </label>
             </div>
+            <article role="main">
+              <div class="toctree-wrapper compound">
+                <ul>
+                  <li class="toctree-l1">
+                    <a class="reference internal" href="about.html"
+                      >Skills Extractor</a
+                    >
+                    <ul>
+                      <li class="toctree-l2">
+                        <a
+                          class="reference internal"
+                          href="about.html#welcome-to-nesta-s-skills-extractor-library"
+                          >Welcome to Nesta’s Skills Extractor Library</a
+                        >
+                      </li>
+                      <li class="toctree-l2">
+                        <a
+                          class="reference internal"
+                          href="about.html#installation"
+                          >Installation <a name="installation"></a
+                        ></a>
+                      </li>
+                      <li class="toctree-l2">
+                        <a
+                          class="reference internal"
+                          href="about.html#tl-dr-using-nesta-s-skills-extractor-library"
+                          >TL;DR: Using Nesta’s Skills Extractor library
+                          <a name="usage"></a
+                        ></a>
+                        <ul>
+                          <li class="toctree-l3">
+                            <a
+                              class="reference internal"
+                              href="about.html#extract-and-map-skills"
+                              >1. Extract AND map skills</a
+                            >
+                          </li>
+                          <li class="toctree-l3">
+                            <a
+                              class="reference internal"
+                              href="about.html#extract-skills"
+                              >2. Extract skills</a
+                            >
+                          </li>
+                          <li class="toctree-l3">
+                            <a
+                              class="reference internal"
+                              href="about.html#map-skills"
+                              >3. Map skills</a
+                            >
+                          </li>
+                        </ul>
+                      </li>
+                      <li class="toctree-l2">
+                        <a class="reference internal" href="about.html#app"
+                          >App</a
+                        >
+                      </li>
+                      <li class="toctree-l2">
+                        <a
+                          class="reference internal"
+                          href="about.html#development"
+                          >Development <a name="development"></a
+                        ></a>
+                        <ul>
+                          <li class="toctree-l3">
+                            <a
+                              class="reference internal"
+                              href="about.html#setup"
+                              >Setup</a
+                            >
+                          </li>
+                          <li class="toctree-l3">
+                            <a
+                              class="reference internal"
+                              href="about.html#project-structure"
+                              >Project structure</a
+                            >
+                          </li>
+                          <li class="toctree-l3">
+                            <a
+                              class="reference internal"
+                              href="about.html#testing"
+                              >Testing</a
+                            >
+                          </li>
+                          <li class="toctree-l3">
+                            <a
+                              class="reference internal"
+                              href="about.html#analysis"
+                              >Analysis</a
+                            >
+                          </li>
+                          <li class="toctree-l3">
+                            <a
+                              class="reference internal"
+                              href="about.html#contributor-guidelines"
+                              >Contributor guidelines</a
+                            >
+                          </li>
+                        </ul>
+                      </li>
+                    </ul>
+                  </li>
+                  <li class="toctree-l1">
+                    <a class="reference internal" href="custom_usage.html"
+                      >Custom Usage</a
+                    >
+                    <ul>
+                      <li class="toctree-l2">
+                        <a
+                          class="reference internal"
+                          href="custom_usage.html#configuration-files"
+                          >Configuration files <a name="config_files"></a
+                        ></a>
+                        <ul>
+                          <li class="toctree-l3">
+                            <a
+                              class="reference internal"
+                              href="custom_usage.html#predefined-configurations"
+                              >Predefined configurations
+                              <a name="predefined_config"></a
+                            ></a>
+                          </li>
+                          <li class="toctree-l3">
+                            <a
+                              class="reference internal"
+                              href="custom_usage.html#configuration-definitions"
+                              >Configuration definitions
+                              <a name="config_defs"></a
+                            ></a>
+                          </li>
+                        </ul>
+                      </li>
+                      <li class="toctree-l2">
+                        <a
+                          class="reference internal"
+                          href="custom_usage.html#mapping-to-your-own-taxonomy"
+                          >Mapping to your own taxonomy <a name="mapping"></a
+                        ></a>
+                        <ul>
+                          <li class="toctree-l3">
+                            <a
+                              class="reference internal"
+                              href="custom_usage.html#format-your-taxonomy"
+                              >Format your taxonomy <a name="format_tax"></a
+                            ></a>
+                          </li>
+                          <li class="toctree-l3">
+                            <a
+                              class="reference internal"
+                              href="custom_usage.html#define-your-own-configuration-file"
+                              >Define your own configuration file
+                              <a name="custom_config"></a
+                            ></a>
+                          </li>
+                        </ul>
+                      </li>
+                    </ul>
+                  </li>
+                  <li class="toctree-l1">
+                    <a class="reference internal" href="pipeline_summary.html"
+                      >Pipeline summary and metrics</a
+                    >
+                    <ul>
+                      <li class="toctree-l2">
+                        <a
+                          class="reference internal"
+                          href="pipeline_summary.html#intended-use"
+                          >Intended Use</a
+                        >
+                      </li>
+                      <li class="toctree-l2">
+                        <a
+                          class="reference internal"
+                          href="pipeline_summary.html#out-of-scope-uses"
+                          >Out of Scope Uses</a
+                        >
+                      </li>
+                      <li class="toctree-l2">
+                        <a
+                          class="reference internal"
+                          href="pipeline_summary.html#metrics"
+                          >Metrics</a
+                        >
+                        <ul>
+                          <li class="toctree-l3">
+                            <a
+                              class="reference internal"
+                              href="pipeline_summary.html#comparison-1-top-skill-groups-per-occupation-comparison-to-esco-essential-skill-groups-per-occupation"
+                              >Comparison 1 - Top skill groups per occupation
+                              comparison to ESCO essential skill groups per
+                              occupation</a
+                            >
+                          </li>
+                          <li class="toctree-l3">
+                            <a
+                              class="reference internal"
+                              href="pipeline_summary.html#comparison-2-degree-of-overlap-between-lightcasts-extracted-skills-and-our-lightcast-skills"
+                              >Comparison 2 - Degree of overlap between
+                              Lightcast’s extracted skills and our Lightcast
+                              skills</a
+                            >
+                          </li>
+                          <li class="toctree-l3">
+                            <a
+                              class="reference internal"
+                              href="pipeline_summary.html#evaluation-1-manual-judgement-of-false-positive-rate"
+                              >Evaluation 1 - Manual judgement of false positive
+                              rate</a
+                            >
+                          </li>
+                          <li class="toctree-l3">
+                            <a
+                              class="reference internal"
+                              href="pipeline_summary.html#evaluation-2-manual-judgement-of-skills-extraction-and-mapping-quality"
+                              >Evaluation 2 - Manual judgement of skills
+                              extraction and mapping quality</a
+                            >
+                          </li>
+                        </ul>
+                      </li>
+                    </ul>
+                  </li>
+                  <li class="toctree-l1">
+                    <a class="reference internal" href="model_card.html"
+                      >Model Cards</a
+                    >
+                    <ul>
+                      <li class="toctree-l2">
+                        <a
+                          class="reference internal"
+                          href="model_card.html#model-card-named-entity-recognition-model"
+                          >Model Card: Named Entity Recognition Model
+                          <a name="extract_skills_card"></a
+                        ></a>
+                        <ul>
+                          <li class="toctree-l3">
+                            <a
+                              class="reference internal"
+                              href="model_card.html#summary"
+                              >Summary</a
+                            >
+                          </li>
+                          <li class="toctree-l3">
+                            <a
+                              class="reference internal"
+                              href="model_card.html#training"
+                              >Training</a
+                            >
+                          </li>
+                          <li class="toctree-l3">
+                            <a
+                              class="reference internal"
+                              href="model_card.html#ner-metrics"
+                              >NER Metrics</a
+                            >
+                          </li>
+                          <li class="toctree-l3">
+                            <a
+                              class="reference internal"
+                              href="model_card.html#multiskill-metrics"
+                              >Multiskill Metrics</a
+                            >
+                          </li>
+                          <li class="toctree-l3">
+                            <a
+                              class="reference internal"
+                              href="model_card.html#caveats-and-recommendations"
+                              >Caveats and Recommendations</a
+                            >
+                          </li>
+                        </ul>
+                      </li>
+                      <li class="toctree-l2">
+                        <a
+                          class="reference internal"
+                          href="model_card.html#model-card-skills-to-taxonomy-mapping"
+                          >Model Card: Skills to Taxonomy Mapping
+                          <a name="mapping_card"></a
+                        ></a>
+                        <ul>
+                          <li class="toctree-l3">
+                            <a
+                              class="reference internal"
+                              href="model_card.html#id1"
+                              >Summary</a
+                            >
+                          </li>
+                          <li class="toctree-l3">
+                            <a
+                              class="reference internal"
+                              href="model_card.html#model-factors"
+                              >Model Factors</a
+                            >
+                          </li>
+                          <li class="toctree-l3">
+                            <a
+                              class="reference internal"
+                              href="model_card.html#id2"
+                              >Caveats and Recommendations</a
+                            >
+                          </li>
+                        </ul>
+                      </li>
+                    </ul>
+                  </li>
+                  <li class="toctree-l1">
+                    <a class="reference internal" href="labelling.html"
+                      >Entity Labelling</a
+                    >
+                    <ul>
+                      <li class="toctree-l2">
+                        <a
+                          class="reference internal"
+                          href="labelling.html#training-dataset"
+                          >Training dataset</a
+                        >
+                      </li>
+                    </ul>
+                  </li>
+                  <li class="toctree-l1">
+                    <a class="reference internal" href="extract_skills.html"
+                      >The
+                      <code class="docutils literal notranslate"
+                        ><span class="pre">ExtractSkills</span></code
+                      >
+                      class</a
+                    >
+                    <ul>
+                      <li class="toctree-l2">
+                        <a
+                          class="reference internal"
+                          href="extract_skills.html#ojd_daps_skills.pipeline.extract_skills.extract_skills.ExtractSkills"
+                          ><code class="docutils literal notranslate"
+                            ><span class="pre">ExtractSkills</span></code
+                          ></a
+                        >
+                        <ul>
+                          <li class="toctree-l3">
+                            <a
+                              class="reference internal"
+                              href="extract_skills.html#ojd_daps_skills.pipeline.extract_skills.extract_skills.ExtractSkills.load"
+                              ><code class="docutils literal notranslate"
+                                ><span class="pre"
+                                  >ExtractSkills.load()</span
+                                ></code
+                              ></a
+                            >
+                          </li>
+                          <li class="toctree-l3">
+                            <a
+                              class="reference internal"
+                              href="extract_skills.html#ojd_daps_skills.pipeline.extract_skills.extract_skills.ExtractSkills.extract_skills"
+                              ><code class="docutils literal notranslate"
+                                ><span class="pre"
+                                  >ExtractSkills.extract_skills()</span
+                                ></code
+                              ></a
+                            >
+                          </li>
+                          <li class="toctree-l3">
+                            <a
+                              class="reference internal"
+                              href="extract_skills.html#ojd_daps_skills.pipeline.extract_skills.extract_skills.ExtractSkills.get_skills"
+                              ><code class="docutils literal notranslate"
+                                ><span class="pre"
+                                  >ExtractSkills.get_skills()</span
+                                ></code
+                              ></a
+                            >
+                          </li>
+                          <li class="toctree-l3">
+                            <a
+                              class="reference internal"
+                              href="extract_skills.html#ojd_daps_skills.pipeline.extract_skills.extract_skills.ExtractSkills.map_skills"
+                              ><code class="docutils literal notranslate"
+                                ><span class="pre"
+                                  >ExtractSkills.map_skills()</span
+                                ></code
+                              ></a
+                            >
+                          </li>
+                          <li class="toctree-l3">
+                            <a
+                              class="reference internal"
+                              href="extract_skills.html#ojd_daps_skills.pipeline.extract_skills.extract_skills.ExtractSkills.format_skills"
+                              ><code class="docutils literal notranslate"
+                                ><span class="pre"
+                                  >ExtractSkills.format_skills()</span
+                                ></code
+                              ></a
+                            >
+                          </li>
+                        </ul>
+                      </li>
+                    </ul>
+                  </li>
+                  <li class="toctree-l1">
+                    <a class="reference internal" href="license.html"
+                      >The MIT License (MIT)</a
+                    >
+                  </li>
+                </ul>
+              </div>
+            </article>
           </div>
+          <footer>
+            <div class="related-pages">
+              <a class="next-page" href="about.html">
+                <div class="page-info">
+                  <div class="context">
+                    <span>Next</span>
+                  </div>
+                  <div class="title">Skills Extractor</div>
+                </div>
+                <svg class="furo-related-icon">
+                  <use href="#svg-arrow-right"></use>
+                </svg>
+              </a>
+            </div>
+            <div class="bottom-of-page">
+              <div class="left-details">
+                <div class="copyright">
+                  Copyright &#169; 2022, Liz Gallagher, India Kerle
+                </div>
+                Made with <a href="https://www.sphinx-doc.org/">Sphinx</a> and
+                <a class="muted-link" href="https://pradyunsg.me">@pradyunsg</a
+                >'s
+
+                <a href="https://github.com/pradyunsg/furo">Furo</a>
+              </div>
+              <div class="right-details">
+                <div class="icons"></div>
+              </div>
+            </div>
+          </footer>
         </div>
-        
-      </footer>
+        <aside class="toc-drawer no-toc"></aside>
+      </div>
     </div>
-    <aside class="toc-drawer no-toc">
-      
-      
-      
-    </aside>
-  </div>
-</div><script data-url_root="./" id="documentation_options" src="_static/documentation_options.js"></script>
+    <script
+      data-url_root="./"
+      id="documentation_options"
+      src="_static/documentation_options.js"
+    ></script>
     <script src="_static/jquery.js"></script>
     <script src="_static/underscore.js"></script>
     <script src="_static/_sphinx_javascript_frameworks_compat.js"></script>
     <script src="_static/doctools.js"></script>
     <script src="_static/sphinx_highlight.js"></script>
     <script src="_static/scripts/furo.js"></script>
-    </body>
-</html>
\ No newline at end of file
+  </body>
+</html>
diff --git a/docs/build/html/labelling.html b/docs/build/html/labelling.html
index babcb631..8f91bebe 100644
--- a/docs/build/html/labelling.html
+++ b/docs/build/html/labelling.html
@@ -1,304 +1,531 @@
-<!doctype html>
+<!DOCTYPE html>
 <html class="no-js" lang="en">
-  <head><meta charset="utf-8"/>
-    <meta name="viewport" content="width=device-width,initial-scale=1"/>
-    <meta name="color-scheme" content="light dark"><link rel="author" title="About these documents" href="about.html" /><link rel="index" title="Index" href="genindex.html" /><link rel="search" title="Search" href="search.html" /><link rel="next" title="The ExtractSkills class" href="extract_skills.html" /><link rel="prev" title="Model Cards" href="model_card.html" />
+  <head>
+    <meta charset="utf-8" />
+    <meta name="viewport" content="width=device-width,initial-scale=1" />
+    <meta name="color-scheme" content="light dark" />
+    <meta
+      name="generator"
+      content="Docutils 0.18.1: http://docutils.sourceforge.net/"
+    />
+    <link rel="author" title="About these documents" href="about.html" />
+    <link rel="index" title="Index" href="genindex.html" />
+    <link rel="search" title="Search" href="search.html" />
+    <link
+      rel="next"
+      title="The ExtractSkills class"
+      href="extract_skills.html"
+    />
+    <link rel="prev" title="Model Cards" href="model_card.html" />
 
-    <meta name="generator" content="sphinx-5.3.0, furo 2022.09.29"/>
-        <title>Entity Labelling - Skills Extractor v1.0.1 documentation</title>
-      <link rel="stylesheet" type="text/css" href="_static/pygments.css" />
-    <link rel="stylesheet" type="text/css" href="_static/styles/furo.css?digest=d81277517bee4d6b0349d71bb2661d4890b5617c" />
-    <link rel="stylesheet" type="text/css" href="_static/styles/furo-extensions.css?digest=30d1aed668e5c3a91c3e3bf6a60b675221979f0e" />
-    
-    
+    <meta name="generator" content="sphinx-5.3.0, furo 2022.09.29" />
+    <title>Entity Labelling - Skills Extractor v1.0.1 documentation</title>
+    <link rel="stylesheet" type="text/css" href="_static/pygments.css" />
+    <link
+      rel="stylesheet"
+      type="text/css"
+      href="_static/styles/furo.css?digest=d81277517bee4d6b0349d71bb2661d4890b5617c"
+    />
+    <link
+      rel="stylesheet"
+      type="text/css"
+      href="_static/styles/furo-extensions.css?digest=30d1aed668e5c3a91c3e3bf6a60b675221979f0e"
+    />
 
-
-<style>
-  body {
-    --color-code-background: #f8f8f8;
-  --color-code-foreground: black;
-  
-  }
-  @media not print {
-    body[data-theme="dark"] {
-      --color-code-background: #202020;
-  --color-code-foreground: #d0d0d0;
-  
-    }
-    @media (prefers-color-scheme: dark) {
-      body:not([data-theme="light"]) {
-        --color-code-background: #202020;
-  --color-code-foreground: #d0d0d0;
-  
+    <style>
+      body {
+        --color-code-background: #f8f8f8;
+        --color-code-foreground: black;
+      }
+      @media not print {
+        body[data-theme="dark"] {
+          --color-code-background: #202020;
+          --color-code-foreground: #d0d0d0;
+        }
+        @media (prefers-color-scheme: dark) {
+          body:not([data-theme="light"]) {
+            --color-code-background: #202020;
+            --color-code-foreground: #d0d0d0;
+          }
+        }
       }
-    }
-  }
-</style></head>
+    </style>
+  </head>
   <body>
-    
     <script>
       document.body.dataset.theme = localStorage.getItem("theme") || "auto";
     </script>
-    
 
-<svg xmlns="http://www.w3.org/2000/svg" style="display: none;">
-  <symbol id="svg-toc" viewBox="0 0 24 24">
-    <title>Contents</title>
-    <svg stroke="currentColor" fill="currentColor" stroke-width="0" viewBox="0 0 1024 1024">
-      <path d="M408 442h480c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8H408c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8zm-8 204c0 4.4 3.6 8 8 8h480c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8H408c-4.4 0-8 3.6-8 8v56zm504-486H120c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8h784c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8zm0 632H120c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8h784c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8zM115.4 518.9L271.7 642c5.8 4.6 14.4.5 14.4-6.9V388.9c0-7.4-8.5-11.5-14.4-6.9L115.4 505.1a8.74 8.74 0 0 0 0 13.8z"/>
-    </svg>
-  </symbol>
-  <symbol id="svg-menu" viewBox="0 0 24 24">
-    <title>Menu</title>
-    <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
-      stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="feather-menu">
-      <line x1="3" y1="12" x2="21" y2="12"></line>
-      <line x1="3" y1="6" x2="21" y2="6"></line>
-      <line x1="3" y1="18" x2="21" y2="18"></line>
-    </svg>
-  </symbol>
-  <symbol id="svg-arrow-right" viewBox="0 0 24 24">
-    <title>Expand</title>
-    <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
-      stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="feather-chevron-right">
-      <polyline points="9 18 15 12 9 6"></polyline>
-    </svg>
-  </symbol>
-  <symbol id="svg-sun" viewBox="0 0 24 24">
-    <title>Light mode</title>
-    <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
-      stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round" class="feather-sun">
-      <circle cx="12" cy="12" r="5"></circle>
-      <line x1="12" y1="1" x2="12" y2="3"></line>
-      <line x1="12" y1="21" x2="12" y2="23"></line>
-      <line x1="4.22" y1="4.22" x2="5.64" y2="5.64"></line>
-      <line x1="18.36" y1="18.36" x2="19.78" y2="19.78"></line>
-      <line x1="1" y1="12" x2="3" y2="12"></line>
-      <line x1="21" y1="12" x2="23" y2="12"></line>
-      <line x1="4.22" y1="19.78" x2="5.64" y2="18.36"></line>
-      <line x1="18.36" y1="5.64" x2="19.78" y2="4.22"></line>
-    </svg>
-  </symbol>
-  <symbol id="svg-moon" viewBox="0 0 24 24">
-    <title>Dark mode</title>
-    <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
-      stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round" class="icon-tabler-moon">
-      <path stroke="none" d="M0 0h24v24H0z" fill="none" />
-      <path d="M12 3c.132 0 .263 0 .393 0a7.5 7.5 0 0 0 7.92 12.446a9 9 0 1 1 -8.313 -12.454z" />
-    </svg>
-  </symbol>
-  <symbol id="svg-sun-half" viewBox="0 0 24 24">
-    <title>Auto light/dark mode</title>
-    <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
-      stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round" class="icon-tabler-shadow">
-      <path stroke="none" d="M0 0h24v24H0z" fill="none"/>
-      <circle cx="12" cy="12" r="9" />
-      <path d="M13 12h5" />
-      <path d="M13 15h4" />
-      <path d="M13 18h1" />
-      <path d="M13 9h4" />
-      <path d="M13 6h1" />
+    <svg xmlns="http://www.w3.org/2000/svg" style="display: none">
+      <symbol id="svg-toc" viewBox="0 0 24 24">
+        <title>Contents</title>
+        <svg
+          stroke="currentColor"
+          fill="currentColor"
+          stroke-width="0"
+          viewBox="0 0 1024 1024"
+        >
+          <path
+            d="M408 442h480c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8H408c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8zm-8 204c0 4.4 3.6 8 8 8h480c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8H408c-4.4 0-8 3.6-8 8v56zm504-486H120c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8h784c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8zm0 632H120c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8h784c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8zM115.4 518.9L271.7 642c5.8 4.6 14.4.5 14.4-6.9V388.9c0-7.4-8.5-11.5-14.4-6.9L115.4 505.1a8.74 8.74 0 0 0 0 13.8z"
+          />
+        </svg>
+      </symbol>
+      <symbol id="svg-menu" viewBox="0 0 24 24">
+        <title>Menu</title>
+        <svg
+          xmlns="http://www.w3.org/2000/svg"
+          viewBox="0 0 24 24"
+          fill="none"
+          stroke="currentColor"
+          stroke-width="2"
+          stroke-linecap="round"
+          stroke-linejoin="round"
+          class="feather-menu"
+        >
+          <line x1="3" y1="12" x2="21" y2="12"></line>
+          <line x1="3" y1="6" x2="21" y2="6"></line>
+          <line x1="3" y1="18" x2="21" y2="18"></line>
+        </svg>
+      </symbol>
+      <symbol id="svg-arrow-right" viewBox="0 0 24 24">
+        <title>Expand</title>
+        <svg
+          xmlns="http://www.w3.org/2000/svg"
+          viewBox="0 0 24 24"
+          fill="none"
+          stroke="currentColor"
+          stroke-width="2"
+          stroke-linecap="round"
+          stroke-linejoin="round"
+          class="feather-chevron-right"
+        >
+          <polyline points="9 18 15 12 9 6"></polyline>
+        </svg>
+      </symbol>
+      <symbol id="svg-sun" viewBox="0 0 24 24">
+        <title>Light mode</title>
+        <svg
+          xmlns="http://www.w3.org/2000/svg"
+          viewBox="0 0 24 24"
+          fill="none"
+          stroke="currentColor"
+          stroke-width="1.5"
+          stroke-linecap="round"
+          stroke-linejoin="round"
+          class="feather-sun"
+        >
+          <circle cx="12" cy="12" r="5"></circle>
+          <line x1="12" y1="1" x2="12" y2="3"></line>
+          <line x1="12" y1="21" x2="12" y2="23"></line>
+          <line x1="4.22" y1="4.22" x2="5.64" y2="5.64"></line>
+          <line x1="18.36" y1="18.36" x2="19.78" y2="19.78"></line>
+          <line x1="1" y1="12" x2="3" y2="12"></line>
+          <line x1="21" y1="12" x2="23" y2="12"></line>
+          <line x1="4.22" y1="19.78" x2="5.64" y2="18.36"></line>
+          <line x1="18.36" y1="5.64" x2="19.78" y2="4.22"></line>
+        </svg>
+      </symbol>
+      <symbol id="svg-moon" viewBox="0 0 24 24">
+        <title>Dark mode</title>
+        <svg
+          xmlns="http://www.w3.org/2000/svg"
+          viewBox="0 0 24 24"
+          fill="none"
+          stroke="currentColor"
+          stroke-width="1.5"
+          stroke-linecap="round"
+          stroke-linejoin="round"
+          class="icon-tabler-moon"
+        >
+          <path stroke="none" d="M0 0h24v24H0z" fill="none" />
+          <path
+            d="M12 3c.132 0 .263 0 .393 0a7.5 7.5 0 0 0 7.92 12.446a9 9 0 1 1 -8.313 -12.454z"
+          />
+        </svg>
+      </symbol>
+      <symbol id="svg-sun-half" viewBox="0 0 24 24">
+        <title>Auto light/dark mode</title>
+        <svg
+          xmlns="http://www.w3.org/2000/svg"
+          viewBox="0 0 24 24"
+          fill="none"
+          stroke="currentColor"
+          stroke-width="1.5"
+          stroke-linecap="round"
+          stroke-linejoin="round"
+          class="icon-tabler-shadow"
+        >
+          <path stroke="none" d="M0 0h24v24H0z" fill="none" />
+          <circle cx="12" cy="12" r="9" />
+          <path d="M13 12h5" />
+          <path d="M13 15h4" />
+          <path d="M13 18h1" />
+          <path d="M13 9h4" />
+          <path d="M13 6h1" />
+        </svg>
+      </symbol>
     </svg>
-  </symbol>
-</svg>
 
-<input type="checkbox" class="sidebar-toggle" name="__navigation" id="__navigation">
-<input type="checkbox" class="sidebar-toggle" name="__toc" id="__toc">
-<label class="overlay sidebar-overlay" for="__navigation">
-  <div class="visually-hidden">Hide navigation sidebar</div>
-</label>
-<label class="overlay toc-overlay" for="__toc">
-  <div class="visually-hidden">Hide table of contents sidebar</div>
-</label>
+    <input
+      type="checkbox"
+      class="sidebar-toggle"
+      name="__navigation"
+      id="__navigation"
+    />
+    <input type="checkbox" class="sidebar-toggle" name="__toc" id="__toc" />
+    <label class="overlay sidebar-overlay" for="__navigation">
+      <div class="visually-hidden">Hide navigation sidebar</div>
+    </label>
+    <label class="overlay toc-overlay" for="__toc">
+      <div class="visually-hidden">Hide table of contents sidebar</div>
+    </label>
 
-
-
-<div class="page">
-  <header class="mobile-header">
-    <div class="header-left">
-      <label class="nav-overlay-icon" for="__navigation">
-        <div class="visually-hidden">Toggle site navigation sidebar</div>
-        <i class="icon"><svg><use href="#svg-menu"></use></svg></i>
-      </label>
-    </div>
-    <div class="header-center">
-      <a href="index.html"><div class="brand">Skills Extractor v1.0.1 documentation</div></a>
-    </div>
-    <div class="header-right">
-      <div class="theme-toggle-container theme-toggle-header">
-        <button class="theme-toggle">
-          <div class="visually-hidden">Toggle Light / Dark / Auto color theme</div>
-          <svg class="theme-icon-when-auto"><use href="#svg-sun-half"></use></svg>
-          <svg class="theme-icon-when-dark"><use href="#svg-moon"></use></svg>
-          <svg class="theme-icon-when-light"><use href="#svg-sun"></use></svg>
-        </button>
-      </div>
-      <label class="toc-overlay-icon toc-header-icon" for="__toc">
-        <div class="visually-hidden">Toggle table of contents sidebar</div>
-        <i class="icon"><svg><use href="#svg-toc"></use></svg></i>
-      </label>
-    </div>
-  </header>
-  <aside class="sidebar-drawer">
-    <div class="sidebar-container">
-      
-      <div class="sidebar-sticky"><a class="sidebar-brand centered" href="index.html">
-  
-  <div class="sidebar-logo-container">
-    <img class="sidebar-logo" src="_static/nesta_escoe_transparent.png" alt="Logo"/>
-  </div>
-  
-  <span class="sidebar-brand-text">Skills Extractor v1.0.1 documentation</span>
-  
-</a><form class="sidebar-search-container" method="get" action="search.html" role="search">
-  <input class="sidebar-search" placeholder=Search name="q" aria-label="Search">
-  <input type="hidden" name="check_keywords" value="yes">
-  <input type="hidden" name="area" value="default">
-</form>
-<div id="searchbox"></div><div class="sidebar-scroll"><div class="sidebar-tree">
-  <ul class="current">
-<li class="toctree-l1"><a class="reference internal" href="about.html">Skills Extractor</a></li>
-<li class="toctree-l1"><a class="reference internal" href="custom_usage.html">Custom Usage</a></li>
-<li class="toctree-l1"><a class="reference internal" href="pipeline_summary.html">Pipeline summary and metrics</a></li>
-<li class="toctree-l1"><a class="reference internal" href="model_card.html">Model Cards</a></li>
-<li class="toctree-l1 current current-page"><a class="current reference internal" href="#">Entity Labelling</a></li>
-<li class="toctree-l1"><a class="reference internal" href="extract_skills.html">The <code class="docutils literal notranslate"><span class="pre">ExtractSkills</span></code> class</a></li>
-<li class="toctree-l1"><a class="reference internal" href="license.html">The MIT License (MIT)</a></li>
-</ul>
-
-</div>
-</div>
-
-      </div>
-      
-    </div>
-  </aside>
-  <div class="main">
-    <div class="content">
-      <div class="article-container">
-        <a href="#" class="back-to-top muted-link">
-          <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24">
-            <path d="M13 20h-2V8l-5.5 5.5-1.42-1.42L12 4.16l7.92 7.92-1.42 1.42L13 8v12z"></path>
-          </svg>
-          <span>Back to top</span>
-        </a>
-        <div class="content-icon-container">
-          
-<div class="theme-toggle-container theme-toggle-content">
+    <div class="page">
+      <header class="mobile-header">
+        <div class="header-left">
+          <label class="nav-overlay-icon" for="__navigation">
+            <div class="visually-hidden">Toggle site navigation sidebar</div>
+            <i class="icon"
+              ><svg><use href="#svg-menu"></use></svg
+            ></i>
+          </label>
+        </div>
+        <div class="header-center">
+          <a href="index.html"
+            ><div class="brand">Skills Extractor v1.0.1 documentation</div></a
+          >
+        </div>
+        <div class="header-right">
+          <div class="theme-toggle-container theme-toggle-header">
             <button class="theme-toggle">
-              <div class="visually-hidden">Toggle Light / Dark / Auto color theme</div>
-              <svg class="theme-icon-when-auto"><use href="#svg-sun-half"></use></svg>
-              <svg class="theme-icon-when-dark"><use href="#svg-moon"></use></svg>
-              <svg class="theme-icon-when-light"><use href="#svg-sun"></use></svg>
+              <div class="visually-hidden">
+                Toggle Light / Dark / Auto color theme
+              </div>
+              <svg class="theme-icon-when-auto">
+                <use href="#svg-sun-half"></use>
+              </svg>
+              <svg class="theme-icon-when-dark">
+                <use href="#svg-moon"></use>
+              </svg>
+              <svg class="theme-icon-when-light">
+                <use href="#svg-sun"></use>
+              </svg>
             </button>
           </div>
-          <label class="toc-overlay-icon toc-content-icon" for="__toc">
+          <label class="toc-overlay-icon toc-header-icon" for="__toc">
             <div class="visually-hidden">Toggle table of contents sidebar</div>
-            <i class="icon"><svg><use href="#svg-toc"></use></svg></i>
+            <i class="icon"
+              ><svg><use href="#svg-toc"></use></svg
+            ></i>
           </label>
         </div>
-        <article role="main">
-          <div class="section" id="entity-labelling">
-<h1>Entity Labelling<a class="headerlink" href="#entity-labelling" title="Permalink to this heading">#</a></h1>
-<p>To extract skills from job adverts we took an approach of training a named entity recognition (NER) model to predict which parts of job adverts were skills (“skill entities”) and which were experiences (“experience entities”).</p>
-<p>To train the NER model we needed labelled data. First we created a random sample of job adverts and got them into a form needed for labelling using <a class="reference external" href="https://labelstud.io/">Label Studio</a>. More about this labelling process can be found in the <a class="reference external" href="https://nestauk.github.io/ojd_daps_skills/pipeline/skill_ner/README.md"><code class="docutils literal notranslate"><span class="pre">skill_ner</span></code> pipeline</a>.</p>
-<p>There are 3 entity labels in our training data:</p>
-<ol class="arabic simple">
-<li><p><code class="docutils literal notranslate"><span class="pre">SKILL</span></code></p></li>
-<li><p><code class="docutils literal notranslate"><span class="pre">MULTISKILL</span></code></p></li>
-<li><p><code class="docutils literal notranslate"><span class="pre">EXPERIENCE</span></code></p></li>
-</ol>
-<p>The user interface for this labelling task looks like:</p>
-<p><img alt="" src="_images/label_studio.png" /></p>
-<p>We tried our best to label from the start to end of each individual skill, starting at the verb (if given):
-<img alt="" src="_images/label_eg1.jpg" /></p>
-<p>Sometimes it wasn’t easy to label individual skills, for example an earlier part of the sentence might be needed to define the later part. An example of this is “Working in a team and on an individual basis” - we could label “Working in a team” as a single skill, but “on an individual basis” makes no sense without the “Working” word. In these situations we labelled the whole span as multi skills:
-<img alt="" src="_images/label_eg4.jpg" /></p>
-<p>Sometimes there were no entities to label:
-<img alt="" src="_images/label_eg5.jpg" /></p>
-<p><code class="docutils literal notranslate"><span class="pre">EXPERIENCE</span></code> labels will often be followed by the word “experience” e.g. “insurance experience”, and we included some qualifications as experience, e.g. “Electrical qualifications”.</p>
-<div class="section" id="training-dataset">
-<h2>Training dataset<a class="headerlink" href="#training-dataset" title="Permalink to this heading">#</a></h2>
-<p>For the current NER model, 5641 entities in 375 job adverts from our dataset of job adverts were labelled; 354 are multiskill, 4696 are skill, and 608 were experience entities. 20% of the labelled entities were held out as a test set to evaluate the models.</p>
-</div>
-</div>
-
-        </article>
-      </div>
-      <footer>
-        
-        <div class="related-pages">
-          <a class="next-page" href="extract_skills.html">
-              <div class="page-info">
-                <div class="context">
-                  <span>Next</span>
-                </div>
-                <div class="title">The <code class="docutils literal notranslate"><span class="pre">ExtractSkills</span></code> class</div>
+      </header>
+      <aside class="sidebar-drawer">
+        <div class="sidebar-container">
+          <div class="sidebar-sticky">
+            <a class="sidebar-brand centered" href="index.html">
+              <div class="sidebar-logo-container">
+                <img
+                  class="sidebar-logo"
+                  src="_static/nesta_escoe_transparent.png"
+                  alt="Logo"
+                />
               </div>
-              <svg class="furo-related-icon"><use href="#svg-arrow-right"></use></svg>
+
+              <span class="sidebar-brand-text"
+                >Skills Extractor v1.0.1 documentation</span
+              >
             </a>
-          <a class="prev-page" href="model_card.html">
-              <svg class="furo-related-icon"><use href="#svg-arrow-right"></use></svg>
-              <div class="page-info">
-                <div class="context">
-                  <span>Previous</span>
-                </div>
-                
-                <div class="title">Model Cards</div>
-                
+            <form
+              class="sidebar-search-container"
+              method="get"
+              action="search.html"
+              role="search"
+            >
+              <input
+                class="sidebar-search"
+                placeholder="Search"
+                name="q"
+                aria-label="Search"
+              />
+              <input type="hidden" name="check_keywords" value="yes" />
+              <input type="hidden" name="area" value="default" />
+            </form>
+            <div id="searchbox"></div>
+            <div class="sidebar-scroll">
+              <div class="sidebar-tree">
+                <ul class="current">
+                  <li class="toctree-l1">
+                    <a class="reference internal" href="about.html"
+                      >Skills Extractor</a
+                    >
+                  </li>
+                  <li class="toctree-l1">
+                    <a class="reference internal" href="custom_usage.html"
+                      >Custom Usage</a
+                    >
+                  </li>
+                  <li class="toctree-l1">
+                    <a class="reference internal" href="pipeline_summary.html"
+                      >Pipeline summary and metrics</a
+                    >
+                  </li>
+                  <li class="toctree-l1">
+                    <a class="reference internal" href="model_card.html"
+                      >Model Cards</a
+                    >
+                  </li>
+                  <li class="toctree-l1 current current-page">
+                    <a class="current reference internal" href="#"
+                      >Entity Labelling</a
+                    >
+                  </li>
+                  <li class="toctree-l1">
+                    <a class="reference internal" href="extract_skills.html"
+                      >The
+                      <code class="docutils literal notranslate"
+                        ><span class="pre">ExtractSkills</span></code
+                      >
+                      class</a
+                    >
+                  </li>
+                  <li class="toctree-l1">
+                    <a class="reference internal" href="license.html"
+                      >The MIT License (MIT)</a
+                    >
+                  </li>
+                </ul>
               </div>
-            </a>
-        </div>
-        <div class="bottom-of-page">
-          <div class="left-details">
-            <div class="copyright">
-                Copyright &#169; 2022, Liz Gallagher, India Kerle
             </div>
-            Made with <a href="https://www.sphinx-doc.org/">Sphinx</a> and <a class="muted-link" href="https://pradyunsg.me">@pradyunsg</a>'s
-            
-            <a href="https://github.com/pradyunsg/furo">Furo</a>
-            
           </div>
-          <div class="right-details">
-            <div class="icons">
-              
+        </div>
+      </aside>
+      <div class="main">
+        <div class="content">
+          <div class="article-container">
+            <a href="#" class="back-to-top muted-link">
+              <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24">
+                <path
+                  d="M13 20h-2V8l-5.5 5.5-1.42-1.42L12 4.16l7.92 7.92-1.42 1.42L13 8v12z"
+                ></path>
+              </svg>
+              <span>Back to top</span>
+            </a>
+            <div class="content-icon-container">
+              <div class="theme-toggle-container theme-toggle-content">
+                <button class="theme-toggle">
+                  <div class="visually-hidden">
+                    Toggle Light / Dark / Auto color theme
+                  </div>
+                  <svg class="theme-icon-when-auto">
+                    <use href="#svg-sun-half"></use>
+                  </svg>
+                  <svg class="theme-icon-when-dark">
+                    <use href="#svg-moon"></use>
+                  </svg>
+                  <svg class="theme-icon-when-light">
+                    <use href="#svg-sun"></use>
+                  </svg>
+                </button>
+              </div>
+              <label class="toc-overlay-icon toc-content-icon" for="__toc">
+                <div class="visually-hidden">
+                  Toggle table of contents sidebar
+                </div>
+                <i class="icon"
+                  ><svg><use href="#svg-toc"></use></svg
+                ></i>
+              </label>
             </div>
+            <article role="main">
+              <section id="entity-labelling">
+                <h1>
+                  Entity Labelling<a
+                    class="headerlink"
+                    href="#entity-labelling"
+                    title="Permalink to this heading"
+                    >#</a
+                  >
+                </h1>
+                <p>
+                  To extract skills from job adverts we took an approach of
+                  training a named entity recognition (NER) model to predict
+                  which parts of job adverts were skills (“skill entities”) and
+                  which were experiences (“experience entities”).
+                </p>
+                <p>
+                  To train the NER model we needed labelled data. First we
+                  created a random sample of job adverts and got them into a
+                  form needed for labelling using
+                  <a class="reference external" href="https://labelstud.io/"
+                    >Label Studio</a
+                  >. More about this labelling process can be found in the
+                  <a
+                    class="reference external"
+                    href="https://nestauk.github.io/ojd_daps_skills/pipeline/skill_ner/README.md"
+                    ><code class="docutils literal notranslate"
+                      ><span class="pre">skill_ner</span></code
+                    >
+                    pipeline</a
+                  >.
+                </p>
+                <p>There are 3 entity labels in our training data:</p>
+                <ol class="arabic simple">
+                  <li>
+                    <p>
+                      <code class="docutils literal notranslate"
+                        ><span class="pre">SKILL</span></code
+                      >
+                    </p>
+                  </li>
+                  <li>
+                    <p>
+                      <code class="docutils literal notranslate"
+                        ><span class="pre">MULTISKILL</span></code
+                      >
+                    </p>
+                  </li>
+                  <li>
+                    <p>
+                      <code class="docutils literal notranslate"
+                        ><span class="pre">EXPERIENCE</span></code
+                      >
+                    </p>
+                  </li>
+                </ol>
+                <p>The user interface for this labelling task looks like:</p>
+                <p><img alt="" src="_images/label_studio.png" /></p>
+                <p>
+                  We tried our best to label from the start to end of each
+                  individual skill, starting at the verb (if given):
+                  <img alt="" src="_images/label_eg1.jpg" />
+                </p>
+                <p>
+                  Sometimes it wasn’t easy to label individual skills, for
+                  example an earlier part of the sentence might be needed to
+                  define the later part. An example of this is “Working in a
+                  team and on an individual basis” - we could label “Working in
+                  a team” as a single skill, but “on an individual basis” makes
+                  no sense without the “Working” word. In these situations we
+                  labelled the whole span as multi skills:
+                  <img alt="" src="_images/label_eg4.jpg" />
+                </p>
+                <p>
+                  Sometimes there were no entities to label:
+                  <img alt="" src="_images/label_eg5.jpg" />
+                </p>
+                <p>
+                  <code class="docutils literal notranslate"
+                    ><span class="pre">EXPERIENCE</span></code
+                  >
+                  labels will often be followed by the word “experience” e.g.
+                  “insurance experience”, and we included some qualifications as
+                  experience, e.g. “Electrical qualifications”.
+                </p>
+                <section id="training-dataset">
+                  <h2>
+                    Training dataset<a
+                      class="headerlink"
+                      href="#training-dataset"
+                      title="Permalink to this heading"
+                      >#</a
+                    >
+                  </h2>
+                  <p>
+                    For the current NER model, 5641 entities in 375 job adverts
+                    from our dataset of job adverts were labelled; 354 are
+                    multiskill, 4696 are skill, and 608 were experience
+                    entities. 20% of the labelled entities were held out as a
+                    test set to evaluate the models.
+                  </p>
+                </section>
+              </section>
+            </article>
           </div>
-        </div>
-        
-      </footer>
-    </div>
-    <aside class="toc-drawer">
-      
-      
-      <div class="toc-sticky toc-scroll">
-        <div class="toc-title-container">
-          <span class="toc-title">
-            On this page
-          </span>
-        </div>
-        <div class="toc-tree-container">
-          <div class="toc-tree">
-            <ul>
-<li><a class="reference internal" href="#">Entity Labelling</a><ul>
-<li><a class="reference internal" href="#training-dataset">Training dataset</a></li>
-</ul>
-</li>
-</ul>
+          <footer>
+            <div class="related-pages">
+              <a class="next-page" href="extract_skills.html">
+                <div class="page-info">
+                  <div class="context">
+                    <span>Next</span>
+                  </div>
+                  <div class="title">
+                    The
+                    <code class="docutils literal notranslate"
+                      ><span class="pre">ExtractSkills</span></code
+                    >
+                    class
+                  </div>
+                </div>
+                <svg class="furo-related-icon">
+                  <use href="#svg-arrow-right"></use>
+                </svg>
+              </a>
+              <a class="prev-page" href="model_card.html">
+                <svg class="furo-related-icon">
+                  <use href="#svg-arrow-right"></use>
+                </svg>
+                <div class="page-info">
+                  <div class="context">
+                    <span>Previous</span>
+                  </div>
 
-          </div>
+                  <div class="title">Model Cards</div>
+                </div>
+              </a>
+            </div>
+            <div class="bottom-of-page">
+              <div class="left-details">
+                <div class="copyright">
+                  Copyright &#169; 2022, Liz Gallagher, India Kerle
+                </div>
+                Made with <a href="https://www.sphinx-doc.org/">Sphinx</a> and
+                <a class="muted-link" href="https://pradyunsg.me">@pradyunsg</a
+                >'s
+
+                <a href="https://github.com/pradyunsg/furo">Furo</a>
+              </div>
+              <div class="right-details">
+                <div class="icons"></div>
+              </div>
+            </div>
+          </footer>
         </div>
+        <aside class="toc-drawer">
+          <div class="toc-sticky toc-scroll">
+            <div class="toc-title-container">
+              <span class="toc-title"> On this page </span>
+            </div>
+            <div class="toc-tree-container">
+              <div class="toc-tree">
+                <ul>
+                  <li>
+                    <a class="reference internal" href="#">Entity Labelling</a>
+                    <ul>
+                      <li>
+                        <a class="reference internal" href="#training-dataset"
+                          >Training dataset</a
+                        >
+                      </li>
+                    </ul>
+                  </li>
+                </ul>
+              </div>
+            </div>
+          </div>
+        </aside>
       </div>
-      
-      
-    </aside>
-  </div>
-</div><script data-url_root="./" id="documentation_options" src="_static/documentation_options.js"></script>
+    </div>
+    <script
+      data-url_root="./"
+      id="documentation_options"
+      src="_static/documentation_options.js"
+    ></script>
     <script src="_static/jquery.js"></script>
     <script src="_static/underscore.js"></script>
     <script src="_static/_sphinx_javascript_frameworks_compat.js"></script>
     <script src="_static/doctools.js"></script>
     <script src="_static/sphinx_highlight.js"></script>
     <script src="_static/scripts/furo.js"></script>
-    </body>
-</html>
\ No newline at end of file
+  </body>
+</html>
diff --git a/docs/build/html/license.html b/docs/build/html/license.html
index f0e07412..121e36af 100644
--- a/docs/build/html/license.html
+++ b/docs/build/html/license.html
@@ -1,260 +1,434 @@
-<!doctype html>
+<!DOCTYPE html>
 <html class="no-js" lang="en">
-  <head><meta charset="utf-8"/>
-    <meta name="viewport" content="width=device-width,initial-scale=1"/>
-    <meta name="color-scheme" content="light dark"><link rel="author" title="About these documents" href="about.html" /><link rel="index" title="Index" href="genindex.html" /><link rel="search" title="Search" href="search.html" /><link rel="prev" title="The ExtractSkills class" href="extract_skills.html" />
+  <head>
+    <meta charset="utf-8" />
+    <meta name="viewport" content="width=device-width,initial-scale=1" />
+    <meta name="color-scheme" content="light dark" />
+    <meta
+      name="generator"
+      content="Docutils 0.18.1: http://docutils.sourceforge.net/"
+    />
+    <link rel="author" title="About these documents" href="about.html" />
+    <link rel="index" title="Index" href="genindex.html" />
+    <link rel="search" title="Search" href="search.html" />
+    <link
+      rel="prev"
+      title="The ExtractSkills class"
+      href="extract_skills.html"
+    />
 
-    <meta name="generator" content="sphinx-5.3.0, furo 2022.09.29"/>
-        <title>The MIT License (MIT) - Skills Extractor v1.0.1 documentation</title>
-      <link rel="stylesheet" type="text/css" href="_static/pygments.css" />
-    <link rel="stylesheet" type="text/css" href="_static/styles/furo.css?digest=d81277517bee4d6b0349d71bb2661d4890b5617c" />
-    <link rel="stylesheet" type="text/css" href="_static/styles/furo-extensions.css?digest=30d1aed668e5c3a91c3e3bf6a60b675221979f0e" />
-    
-    
+    <meta name="generator" content="sphinx-5.3.0, furo 2022.09.29" />
+    <title>The MIT License (MIT) - Skills Extractor v1.0.1 documentation</title>
+    <link rel="stylesheet" type="text/css" href="_static/pygments.css" />
+    <link
+      rel="stylesheet"
+      type="text/css"
+      href="_static/styles/furo.css?digest=d81277517bee4d6b0349d71bb2661d4890b5617c"
+    />
+    <link
+      rel="stylesheet"
+      type="text/css"
+      href="_static/styles/furo-extensions.css?digest=30d1aed668e5c3a91c3e3bf6a60b675221979f0e"
+    />
 
-
-<style>
-  body {
-    --color-code-background: #f8f8f8;
-  --color-code-foreground: black;
-  
-  }
-  @media not print {
-    body[data-theme="dark"] {
-      --color-code-background: #202020;
-  --color-code-foreground: #d0d0d0;
-  
-    }
-    @media (prefers-color-scheme: dark) {
-      body:not([data-theme="light"]) {
-        --color-code-background: #202020;
-  --color-code-foreground: #d0d0d0;
-  
+    <style>
+      body {
+        --color-code-background: #f8f8f8;
+        --color-code-foreground: black;
+      }
+      @media not print {
+        body[data-theme="dark"] {
+          --color-code-background: #202020;
+          --color-code-foreground: #d0d0d0;
+        }
+        @media (prefers-color-scheme: dark) {
+          body:not([data-theme="light"]) {
+            --color-code-background: #202020;
+            --color-code-foreground: #d0d0d0;
+          }
+        }
       }
-    }
-  }
-</style></head>
+    </style>
+  </head>
   <body>
-    
     <script>
       document.body.dataset.theme = localStorage.getItem("theme") || "auto";
     </script>
-    
 
-<svg xmlns="http://www.w3.org/2000/svg" style="display: none;">
-  <symbol id="svg-toc" viewBox="0 0 24 24">
-    <title>Contents</title>
-    <svg stroke="currentColor" fill="currentColor" stroke-width="0" viewBox="0 0 1024 1024">
-      <path d="M408 442h480c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8H408c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8zm-8 204c0 4.4 3.6 8 8 8h480c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8H408c-4.4 0-8 3.6-8 8v56zm504-486H120c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8h784c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8zm0 632H120c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8h784c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8zM115.4 518.9L271.7 642c5.8 4.6 14.4.5 14.4-6.9V388.9c0-7.4-8.5-11.5-14.4-6.9L115.4 505.1a8.74 8.74 0 0 0 0 13.8z"/>
-    </svg>
-  </symbol>
-  <symbol id="svg-menu" viewBox="0 0 24 24">
-    <title>Menu</title>
-    <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
-      stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="feather-menu">
-      <line x1="3" y1="12" x2="21" y2="12"></line>
-      <line x1="3" y1="6" x2="21" y2="6"></line>
-      <line x1="3" y1="18" x2="21" y2="18"></line>
-    </svg>
-  </symbol>
-  <symbol id="svg-arrow-right" viewBox="0 0 24 24">
-    <title>Expand</title>
-    <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
-      stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="feather-chevron-right">
-      <polyline points="9 18 15 12 9 6"></polyline>
-    </svg>
-  </symbol>
-  <symbol id="svg-sun" viewBox="0 0 24 24">
-    <title>Light mode</title>
-    <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
-      stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round" class="feather-sun">
-      <circle cx="12" cy="12" r="5"></circle>
-      <line x1="12" y1="1" x2="12" y2="3"></line>
-      <line x1="12" y1="21" x2="12" y2="23"></line>
-      <line x1="4.22" y1="4.22" x2="5.64" y2="5.64"></line>
-      <line x1="18.36" y1="18.36" x2="19.78" y2="19.78"></line>
-      <line x1="1" y1="12" x2="3" y2="12"></line>
-      <line x1="21" y1="12" x2="23" y2="12"></line>
-      <line x1="4.22" y1="19.78" x2="5.64" y2="18.36"></line>
-      <line x1="18.36" y1="5.64" x2="19.78" y2="4.22"></line>
+    <svg xmlns="http://www.w3.org/2000/svg" style="display: none">
+      <symbol id="svg-toc" viewBox="0 0 24 24">
+        <title>Contents</title>
+        <svg
+          stroke="currentColor"
+          fill="currentColor"
+          stroke-width="0"
+          viewBox="0 0 1024 1024"
+        >
+          <path
+            d="M408 442h480c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8H408c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8zm-8 204c0 4.4 3.6 8 8 8h480c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8H408c-4.4 0-8 3.6-8 8v56zm504-486H120c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8h784c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8zm0 632H120c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8h784c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8zM115.4 518.9L271.7 642c5.8 4.6 14.4.5 14.4-6.9V388.9c0-7.4-8.5-11.5-14.4-6.9L115.4 505.1a8.74 8.74 0 0 0 0 13.8z"
+          />
+        </svg>
+      </symbol>
+      <symbol id="svg-menu" viewBox="0 0 24 24">
+        <title>Menu</title>
+        <svg
+          xmlns="http://www.w3.org/2000/svg"
+          viewBox="0 0 24 24"
+          fill="none"
+          stroke="currentColor"
+          stroke-width="2"
+          stroke-linecap="round"
+          stroke-linejoin="round"
+          class="feather-menu"
+        >
+          <line x1="3" y1="12" x2="21" y2="12"></line>
+          <line x1="3" y1="6" x2="21" y2="6"></line>
+          <line x1="3" y1="18" x2="21" y2="18"></line>
+        </svg>
+      </symbol>
+      <symbol id="svg-arrow-right" viewBox="0 0 24 24">
+        <title>Expand</title>
+        <svg
+          xmlns="http://www.w3.org/2000/svg"
+          viewBox="0 0 24 24"
+          fill="none"
+          stroke="currentColor"
+          stroke-width="2"
+          stroke-linecap="round"
+          stroke-linejoin="round"
+          class="feather-chevron-right"
+        >
+          <polyline points="9 18 15 12 9 6"></polyline>
+        </svg>
+      </symbol>
+      <symbol id="svg-sun" viewBox="0 0 24 24">
+        <title>Light mode</title>
+        <svg
+          xmlns="http://www.w3.org/2000/svg"
+          viewBox="0 0 24 24"
+          fill="none"
+          stroke="currentColor"
+          stroke-width="1.5"
+          stroke-linecap="round"
+          stroke-linejoin="round"
+          class="feather-sun"
+        >
+          <circle cx="12" cy="12" r="5"></circle>
+          <line x1="12" y1="1" x2="12" y2="3"></line>
+          <line x1="12" y1="21" x2="12" y2="23"></line>
+          <line x1="4.22" y1="4.22" x2="5.64" y2="5.64"></line>
+          <line x1="18.36" y1="18.36" x2="19.78" y2="19.78"></line>
+          <line x1="1" y1="12" x2="3" y2="12"></line>
+          <line x1="21" y1="12" x2="23" y2="12"></line>
+          <line x1="4.22" y1="19.78" x2="5.64" y2="18.36"></line>
+          <line x1="18.36" y1="5.64" x2="19.78" y2="4.22"></line>
+        </svg>
+      </symbol>
+      <symbol id="svg-moon" viewBox="0 0 24 24">
+        <title>Dark mode</title>
+        <svg
+          xmlns="http://www.w3.org/2000/svg"
+          viewBox="0 0 24 24"
+          fill="none"
+          stroke="currentColor"
+          stroke-width="1.5"
+          stroke-linecap="round"
+          stroke-linejoin="round"
+          class="icon-tabler-moon"
+        >
+          <path stroke="none" d="M0 0h24v24H0z" fill="none" />
+          <path
+            d="M12 3c.132 0 .263 0 .393 0a7.5 7.5 0 0 0 7.92 12.446a9 9 0 1 1 -8.313 -12.454z"
+          />
+        </svg>
+      </symbol>
+      <symbol id="svg-sun-half" viewBox="0 0 24 24">
+        <title>Auto light/dark mode</title>
+        <svg
+          xmlns="http://www.w3.org/2000/svg"
+          viewBox="0 0 24 24"
+          fill="none"
+          stroke="currentColor"
+          stroke-width="1.5"
+          stroke-linecap="round"
+          stroke-linejoin="round"
+          class="icon-tabler-shadow"
+        >
+          <path stroke="none" d="M0 0h24v24H0z" fill="none" />
+          <circle cx="12" cy="12" r="9" />
+          <path d="M13 12h5" />
+          <path d="M13 15h4" />
+          <path d="M13 18h1" />
+          <path d="M13 9h4" />
+          <path d="M13 6h1" />
+        </svg>
+      </symbol>
     </svg>
-  </symbol>
-  <symbol id="svg-moon" viewBox="0 0 24 24">
-    <title>Dark mode</title>
-    <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
-      stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round" class="icon-tabler-moon">
-      <path stroke="none" d="M0 0h24v24H0z" fill="none" />
-      <path d="M12 3c.132 0 .263 0 .393 0a7.5 7.5 0 0 0 7.92 12.446a9 9 0 1 1 -8.313 -12.454z" />
-    </svg>
-  </symbol>
-  <symbol id="svg-sun-half" viewBox="0 0 24 24">
-    <title>Auto light/dark mode</title>
-    <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
-      stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round" class="icon-tabler-shadow">
-      <path stroke="none" d="M0 0h24v24H0z" fill="none"/>
-      <circle cx="12" cy="12" r="9" />
-      <path d="M13 12h5" />
-      <path d="M13 15h4" />
-      <path d="M13 18h1" />
-      <path d="M13 9h4" />
-      <path d="M13 6h1" />
-    </svg>
-  </symbol>
-</svg>
-
-<input type="checkbox" class="sidebar-toggle" name="__navigation" id="__navigation">
-<input type="checkbox" class="sidebar-toggle" name="__toc" id="__toc">
-<label class="overlay sidebar-overlay" for="__navigation">
-  <div class="visually-hidden">Hide navigation sidebar</div>
-</label>
-<label class="overlay toc-overlay" for="__toc">
-  <div class="visually-hidden">Hide table of contents sidebar</div>
-</label>
-
-
 
-<div class="page">
-  <header class="mobile-header">
-    <div class="header-left">
-      <label class="nav-overlay-icon" for="__navigation">
-        <div class="visually-hidden">Toggle site navigation sidebar</div>
-        <i class="icon"><svg><use href="#svg-menu"></use></svg></i>
-      </label>
-    </div>
-    <div class="header-center">
-      <a href="index.html"><div class="brand">Skills Extractor v1.0.1 documentation</div></a>
-    </div>
-    <div class="header-right">
-      <div class="theme-toggle-container theme-toggle-header">
-        <button class="theme-toggle">
-          <div class="visually-hidden">Toggle Light / Dark / Auto color theme</div>
-          <svg class="theme-icon-when-auto"><use href="#svg-sun-half"></use></svg>
-          <svg class="theme-icon-when-dark"><use href="#svg-moon"></use></svg>
-          <svg class="theme-icon-when-light"><use href="#svg-sun"></use></svg>
-        </button>
-      </div>
-      <label class="toc-overlay-icon toc-header-icon no-toc" for="__toc">
-        <div class="visually-hidden">Toggle table of contents sidebar</div>
-        <i class="icon"><svg><use href="#svg-toc"></use></svg></i>
-      </label>
-    </div>
-  </header>
-  <aside class="sidebar-drawer">
-    <div class="sidebar-container">
-      
-      <div class="sidebar-sticky"><a class="sidebar-brand centered" href="index.html">
-  
-  <div class="sidebar-logo-container">
-    <img class="sidebar-logo" src="_static/nesta_escoe_transparent.png" alt="Logo"/>
-  </div>
-  
-  <span class="sidebar-brand-text">Skills Extractor v1.0.1 documentation</span>
-  
-</a><form class="sidebar-search-container" method="get" action="search.html" role="search">
-  <input class="sidebar-search" placeholder=Search name="q" aria-label="Search">
-  <input type="hidden" name="check_keywords" value="yes">
-  <input type="hidden" name="area" value="default">
-</form>
-<div id="searchbox"></div><div class="sidebar-scroll"><div class="sidebar-tree">
-  <ul class="current">
-<li class="toctree-l1"><a class="reference internal" href="about.html">Skills Extractor</a></li>
-<li class="toctree-l1"><a class="reference internal" href="custom_usage.html">Custom Usage</a></li>
-<li class="toctree-l1"><a class="reference internal" href="pipeline_summary.html">Pipeline summary and metrics</a></li>
-<li class="toctree-l1"><a class="reference internal" href="model_card.html">Model Cards</a></li>
-<li class="toctree-l1"><a class="reference internal" href="labelling.html">Entity Labelling</a></li>
-<li class="toctree-l1"><a class="reference internal" href="extract_skills.html">The <code class="docutils literal notranslate"><span class="pre">ExtractSkills</span></code> class</a></li>
-<li class="toctree-l1 current current-page"><a class="current reference internal" href="#">The MIT License (MIT)</a></li>
-</ul>
-
-</div>
-</div>
+    <input
+      type="checkbox"
+      class="sidebar-toggle"
+      name="__navigation"
+      id="__navigation"
+    />
+    <input type="checkbox" class="sidebar-toggle" name="__toc" id="__toc" />
+    <label class="overlay sidebar-overlay" for="__navigation">
+      <div class="visually-hidden">Hide navigation sidebar</div>
+    </label>
+    <label class="overlay toc-overlay" for="__toc">
+      <div class="visually-hidden">Hide table of contents sidebar</div>
+    </label>
 
-      </div>
-      
-    </div>
-  </aside>
-  <div class="main">
-    <div class="content">
-      <div class="article-container">
-        <a href="#" class="back-to-top muted-link">
-          <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24">
-            <path d="M13 20h-2V8l-5.5 5.5-1.42-1.42L12 4.16l7.92 7.92-1.42 1.42L13 8v12z"></path>
-          </svg>
-          <span>Back to top</span>
-        </a>
-        <div class="content-icon-container">
-          
-<div class="theme-toggle-container theme-toggle-content">
+    <div class="page">
+      <header class="mobile-header">
+        <div class="header-left">
+          <label class="nav-overlay-icon" for="__navigation">
+            <div class="visually-hidden">Toggle site navigation sidebar</div>
+            <i class="icon"
+              ><svg><use href="#svg-menu"></use></svg
+            ></i>
+          </label>
+        </div>
+        <div class="header-center">
+          <a href="index.html"
+            ><div class="brand">Skills Extractor v1.0.1 documentation</div></a
+          >
+        </div>
+        <div class="header-right">
+          <div class="theme-toggle-container theme-toggle-header">
             <button class="theme-toggle">
-              <div class="visually-hidden">Toggle Light / Dark / Auto color theme</div>
-              <svg class="theme-icon-when-auto"><use href="#svg-sun-half"></use></svg>
-              <svg class="theme-icon-when-dark"><use href="#svg-moon"></use></svg>
-              <svg class="theme-icon-when-light"><use href="#svg-sun"></use></svg>
+              <div class="visually-hidden">
+                Toggle Light / Dark / Auto color theme
+              </div>
+              <svg class="theme-icon-when-auto">
+                <use href="#svg-sun-half"></use>
+              </svg>
+              <svg class="theme-icon-when-dark">
+                <use href="#svg-moon"></use>
+              </svg>
+              <svg class="theme-icon-when-light">
+                <use href="#svg-sun"></use>
+              </svg>
             </button>
           </div>
-          <label class="toc-overlay-icon toc-content-icon no-toc" for="__toc">
+          <label class="toc-overlay-icon toc-header-icon no-toc" for="__toc">
             <div class="visually-hidden">Toggle table of contents sidebar</div>
-            <i class="icon"><svg><use href="#svg-toc"></use></svg></i>
+            <i class="icon"
+              ><svg><use href="#svg-toc"></use></svg
+            ></i>
           </label>
         </div>
-        <article role="main">
-          <div class="section" id="the-mit-license-mit">
-<h1>The MIT License (MIT)<a class="headerlink" href="#the-mit-license-mit" title="Permalink to this heading">#</a></h1>
-<p>Copyright (c) 2022, Nesta</p>
-<p>Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:</p>
-<p>The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.</p>
-<p>THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.</p>
-</div>
-
-        </article>
-      </div>
-      <footer>
-        
-        <div class="related-pages">
-          
-          <a class="prev-page" href="extract_skills.html">
-              <svg class="furo-related-icon"><use href="#svg-arrow-right"></use></svg>
-              <div class="page-info">
-                <div class="context">
-                  <span>Previous</span>
-                </div>
-                
-                <div class="title">The <code class="docutils literal notranslate"><span class="pre">ExtractSkills</span></code> class</div>
-                
+      </header>
+      <aside class="sidebar-drawer">
+        <div class="sidebar-container">
+          <div class="sidebar-sticky">
+            <a class="sidebar-brand centered" href="index.html">
+              <div class="sidebar-logo-container">
+                <img
+                  class="sidebar-logo"
+                  src="_static/nesta_escoe_transparent.png"
+                  alt="Logo"
+                />
               </div>
+
+              <span class="sidebar-brand-text"
+                >Skills Extractor v1.0.1 documentation</span
+              >
             </a>
-        </div>
-        <div class="bottom-of-page">
-          <div class="left-details">
-            <div class="copyright">
-                Copyright &#169; 2022, Liz Gallagher, India Kerle
+            <form
+              class="sidebar-search-container"
+              method="get"
+              action="search.html"
+              role="search"
+            >
+              <input
+                class="sidebar-search"
+                placeholder="Search"
+                name="q"
+                aria-label="Search"
+              />
+              <input type="hidden" name="check_keywords" value="yes" />
+              <input type="hidden" name="area" value="default" />
+            </form>
+            <div id="searchbox"></div>
+            <div class="sidebar-scroll">
+              <div class="sidebar-tree">
+                <ul class="current">
+                  <li class="toctree-l1">
+                    <a class="reference internal" href="about.html"
+                      >Skills Extractor</a
+                    >
+                  </li>
+                  <li class="toctree-l1">
+                    <a class="reference internal" href="custom_usage.html"
+                      >Custom Usage</a
+                    >
+                  </li>
+                  <li class="toctree-l1">
+                    <a class="reference internal" href="pipeline_summary.html"
+                      >Pipeline summary and metrics</a
+                    >
+                  </li>
+                  <li class="toctree-l1">
+                    <a class="reference internal" href="model_card.html"
+                      >Model Cards</a
+                    >
+                  </li>
+                  <li class="toctree-l1">
+                    <a class="reference internal" href="labelling.html"
+                      >Entity Labelling</a
+                    >
+                  </li>
+                  <li class="toctree-l1">
+                    <a class="reference internal" href="extract_skills.html"
+                      >The
+                      <code class="docutils literal notranslate"
+                        ><span class="pre">ExtractSkills</span></code
+                      >
+                      class</a
+                    >
+                  </li>
+                  <li class="toctree-l1 current current-page">
+                    <a class="current reference internal" href="#"
+                      >The MIT License (MIT)</a
+                    >
+                  </li>
+                </ul>
+              </div>
             </div>
-            Made with <a href="https://www.sphinx-doc.org/">Sphinx</a> and <a class="muted-link" href="https://pradyunsg.me">@pradyunsg</a>'s
-            
-            <a href="https://github.com/pradyunsg/furo">Furo</a>
-            
           </div>
-          <div class="right-details">
-            <div class="icons">
-              
+        </div>
+      </aside>
+      <div class="main">
+        <div class="content">
+          <div class="article-container">
+            <a href="#" class="back-to-top muted-link">
+              <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24">
+                <path
+                  d="M13 20h-2V8l-5.5 5.5-1.42-1.42L12 4.16l7.92 7.92-1.42 1.42L13 8v12z"
+                ></path>
+              </svg>
+              <span>Back to top</span>
+            </a>
+            <div class="content-icon-container">
+              <div class="theme-toggle-container theme-toggle-content">
+                <button class="theme-toggle">
+                  <div class="visually-hidden">
+                    Toggle Light / Dark / Auto color theme
+                  </div>
+                  <svg class="theme-icon-when-auto">
+                    <use href="#svg-sun-half"></use>
+                  </svg>
+                  <svg class="theme-icon-when-dark">
+                    <use href="#svg-moon"></use>
+                  </svg>
+                  <svg class="theme-icon-when-light">
+                    <use href="#svg-sun"></use>
+                  </svg>
+                </button>
+              </div>
+              <label
+                class="toc-overlay-icon toc-content-icon no-toc"
+                for="__toc"
+              >
+                <div class="visually-hidden">
+                  Toggle table of contents sidebar
+                </div>
+                <i class="icon"
+                  ><svg><use href="#svg-toc"></use></svg
+                ></i>
+              </label>
             </div>
+            <article role="main">
+              <section id="the-mit-license-mit">
+                <h1>
+                  The MIT License (MIT)<a
+                    class="headerlink"
+                    href="#the-mit-license-mit"
+                    title="Permalink to this heading"
+                    >#</a
+                  >
+                </h1>
+                <p>Copyright (c) 2022, Nesta</p>
+                <p>
+                  Permission is hereby granted, free of charge, to any person
+                  obtaining a copy of this software and associated documentation
+                  files (the “Software”), to deal in the Software without
+                  restriction, including without limitation the rights to use,
+                  copy, modify, merge, publish, distribute, sublicense, and/or
+                  sell copies of the Software, and to permit persons to whom the
+                  Software is furnished to do so, subject to the following
+                  conditions:
+                </p>
+                <p>
+                  The above copyright notice and this permission notice shall be
+                  included in all copies or substantial portions of the
+                  Software.
+                </p>
+                <p>
+                  THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY
+                  KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
+                  WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
+                  PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
+                  COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+                  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+                  OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+                  SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+                </p>
+              </section>
+            </article>
           </div>
+          <footer>
+            <div class="related-pages">
+              <a class="prev-page" href="extract_skills.html">
+                <svg class="furo-related-icon">
+                  <use href="#svg-arrow-right"></use>
+                </svg>
+                <div class="page-info">
+                  <div class="context">
+                    <span>Previous</span>
+                  </div>
+
+                  <div class="title">
+                    The
+                    <code class="docutils literal notranslate"
+                      ><span class="pre">ExtractSkills</span></code
+                    >
+                    class
+                  </div>
+                </div>
+              </a>
+            </div>
+            <div class="bottom-of-page">
+              <div class="left-details">
+                <div class="copyright">
+                  Copyright &#169; 2022, Liz Gallagher, India Kerle
+                </div>
+                Made with <a href="https://www.sphinx-doc.org/">Sphinx</a> and
+                <a class="muted-link" href="https://pradyunsg.me">@pradyunsg</a
+                >'s
+
+                <a href="https://github.com/pradyunsg/furo">Furo</a>
+              </div>
+              <div class="right-details">
+                <div class="icons"></div>
+              </div>
+            </div>
+          </footer>
         </div>
-        
-      </footer>
+        <aside class="toc-drawer no-toc"></aside>
+      </div>
     </div>
-    <aside class="toc-drawer no-toc">
-      
-      
-      
-    </aside>
-  </div>
-</div><script data-url_root="./" id="documentation_options" src="_static/documentation_options.js"></script>
+    <script
+      data-url_root="./"
+      id="documentation_options"
+      src="_static/documentation_options.js"
+    ></script>
     <script src="_static/jquery.js"></script>
     <script src="_static/underscore.js"></script>
     <script src="_static/_sphinx_javascript_frameworks_compat.js"></script>
     <script src="_static/doctools.js"></script>
     <script src="_static/sphinx_highlight.js"></script>
     <script src="_static/scripts/furo.js"></script>
-    </body>
-</html>
\ No newline at end of file
+  </body>
+</html>
diff --git a/docs/build/html/model_card.html b/docs/build/html/model_card.html
index 0c9371ca..8f4c1b79 100644
--- a/docs/build/html/model_card.html
+++ b/docs/build/html/model_card.html
@@ -1,414 +1,930 @@
-<!doctype html>
+<!DOCTYPE html>
 <html class="no-js" lang="en">
-  <head><meta charset="utf-8"/>
-    <meta name="viewport" content="width=device-width,initial-scale=1"/>
-    <meta name="color-scheme" content="light dark"><link rel="author" title="About these documents" href="about.html" /><link rel="index" title="Index" href="genindex.html" /><link rel="search" title="Search" href="search.html" /><link rel="next" title="Entity Labelling" href="labelling.html" /><link rel="prev" title="Pipeline summary and metrics" href="pipeline_summary.html" />
+  <head>
+    <meta charset="utf-8" />
+    <meta name="viewport" content="width=device-width,initial-scale=1" />
+    <meta name="color-scheme" content="light dark" />
+    <meta
+      name="generator"
+      content="Docutils 0.18.1: http://docutils.sourceforge.net/"
+    />
+    <link rel="author" title="About these documents" href="about.html" />
+    <link rel="index" title="Index" href="genindex.html" />
+    <link rel="search" title="Search" href="search.html" />
+    <link rel="next" title="Entity Labelling" href="labelling.html" />
+    <link
+      rel="prev"
+      title="Pipeline summary and metrics"
+      href="pipeline_summary.html"
+    />
 
-    <meta name="generator" content="sphinx-5.3.0, furo 2022.09.29"/>
-        <title>Model Cards - Skills Extractor v1.0.1 documentation</title>
-      <link rel="stylesheet" type="text/css" href="_static/pygments.css" />
-    <link rel="stylesheet" type="text/css" href="_static/styles/furo.css?digest=d81277517bee4d6b0349d71bb2661d4890b5617c" />
-    <link rel="stylesheet" type="text/css" href="_static/styles/furo-extensions.css?digest=30d1aed668e5c3a91c3e3bf6a60b675221979f0e" />
-    
-    
+    <meta name="generator" content="sphinx-5.3.0, furo 2022.09.29" />
+    <title>Model Cards - Skills Extractor v1.0.1 documentation</title>
+    <link rel="stylesheet" type="text/css" href="_static/pygments.css" />
+    <link
+      rel="stylesheet"
+      type="text/css"
+      href="_static/styles/furo.css?digest=d81277517bee4d6b0349d71bb2661d4890b5617c"
+    />
+    <link
+      rel="stylesheet"
+      type="text/css"
+      href="_static/styles/furo-extensions.css?digest=30d1aed668e5c3a91c3e3bf6a60b675221979f0e"
+    />
 
-
-<style>
-  body {
-    --color-code-background: #f8f8f8;
-  --color-code-foreground: black;
-  
-  }
-  @media not print {
-    body[data-theme="dark"] {
-      --color-code-background: #202020;
-  --color-code-foreground: #d0d0d0;
-  
-    }
-    @media (prefers-color-scheme: dark) {
-      body:not([data-theme="light"]) {
-        --color-code-background: #202020;
-  --color-code-foreground: #d0d0d0;
-  
+    <style>
+      body {
+        --color-code-background: #f8f8f8;
+        --color-code-foreground: black;
+      }
+      @media not print {
+        body[data-theme="dark"] {
+          --color-code-background: #202020;
+          --color-code-foreground: #d0d0d0;
+        }
+        @media (prefers-color-scheme: dark) {
+          body:not([data-theme="light"]) {
+            --color-code-background: #202020;
+            --color-code-foreground: #d0d0d0;
+          }
+        }
       }
-    }
-  }
-</style></head>
+    </style>
+  </head>
   <body>
-    
     <script>
       document.body.dataset.theme = localStorage.getItem("theme") || "auto";
     </script>
-    
 
-<svg xmlns="http://www.w3.org/2000/svg" style="display: none;">
-  <symbol id="svg-toc" viewBox="0 0 24 24">
-    <title>Contents</title>
-    <svg stroke="currentColor" fill="currentColor" stroke-width="0" viewBox="0 0 1024 1024">
-      <path d="M408 442h480c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8H408c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8zm-8 204c0 4.4 3.6 8 8 8h480c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8H408c-4.4 0-8 3.6-8 8v56zm504-486H120c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8h784c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8zm0 632H120c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8h784c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8zM115.4 518.9L271.7 642c5.8 4.6 14.4.5 14.4-6.9V388.9c0-7.4-8.5-11.5-14.4-6.9L115.4 505.1a8.74 8.74 0 0 0 0 13.8z"/>
-    </svg>
-  </symbol>
-  <symbol id="svg-menu" viewBox="0 0 24 24">
-    <title>Menu</title>
-    <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
-      stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="feather-menu">
-      <line x1="3" y1="12" x2="21" y2="12"></line>
-      <line x1="3" y1="6" x2="21" y2="6"></line>
-      <line x1="3" y1="18" x2="21" y2="18"></line>
-    </svg>
-  </symbol>
-  <symbol id="svg-arrow-right" viewBox="0 0 24 24">
-    <title>Expand</title>
-    <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
-      stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="feather-chevron-right">
-      <polyline points="9 18 15 12 9 6"></polyline>
-    </svg>
-  </symbol>
-  <symbol id="svg-sun" viewBox="0 0 24 24">
-    <title>Light mode</title>
-    <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
-      stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round" class="feather-sun">
-      <circle cx="12" cy="12" r="5"></circle>
-      <line x1="12" y1="1" x2="12" y2="3"></line>
-      <line x1="12" y1="21" x2="12" y2="23"></line>
-      <line x1="4.22" y1="4.22" x2="5.64" y2="5.64"></line>
-      <line x1="18.36" y1="18.36" x2="19.78" y2="19.78"></line>
-      <line x1="1" y1="12" x2="3" y2="12"></line>
-      <line x1="21" y1="12" x2="23" y2="12"></line>
-      <line x1="4.22" y1="19.78" x2="5.64" y2="18.36"></line>
-      <line x1="18.36" y1="5.64" x2="19.78" y2="4.22"></line>
-    </svg>
-  </symbol>
-  <symbol id="svg-moon" viewBox="0 0 24 24">
-    <title>Dark mode</title>
-    <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
-      stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round" class="icon-tabler-moon">
-      <path stroke="none" d="M0 0h24v24H0z" fill="none" />
-      <path d="M12 3c.132 0 .263 0 .393 0a7.5 7.5 0 0 0 7.92 12.446a9 9 0 1 1 -8.313 -12.454z" />
-    </svg>
-  </symbol>
-  <symbol id="svg-sun-half" viewBox="0 0 24 24">
-    <title>Auto light/dark mode</title>
-    <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
-      stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round" class="icon-tabler-shadow">
-      <path stroke="none" d="M0 0h24v24H0z" fill="none"/>
-      <circle cx="12" cy="12" r="9" />
-      <path d="M13 12h5" />
-      <path d="M13 15h4" />
-      <path d="M13 18h1" />
-      <path d="M13 9h4" />
-      <path d="M13 6h1" />
+    <svg xmlns="http://www.w3.org/2000/svg" style="display: none">
+      <symbol id="svg-toc" viewBox="0 0 24 24">
+        <title>Contents</title>
+        <svg
+          stroke="currentColor"
+          fill="currentColor"
+          stroke-width="0"
+          viewBox="0 0 1024 1024"
+        >
+          <path
+            d="M408 442h480c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8H408c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8zm-8 204c0 4.4 3.6 8 8 8h480c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8H408c-4.4 0-8 3.6-8 8v56zm504-486H120c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8h784c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8zm0 632H120c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8h784c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8zM115.4 518.9L271.7 642c5.8 4.6 14.4.5 14.4-6.9V388.9c0-7.4-8.5-11.5-14.4-6.9L115.4 505.1a8.74 8.74 0 0 0 0 13.8z"
+          />
+        </svg>
+      </symbol>
+      <symbol id="svg-menu" viewBox="0 0 24 24">
+        <title>Menu</title>
+        <svg
+          xmlns="http://www.w3.org/2000/svg"
+          viewBox="0 0 24 24"
+          fill="none"
+          stroke="currentColor"
+          stroke-width="2"
+          stroke-linecap="round"
+          stroke-linejoin="round"
+          class="feather-menu"
+        >
+          <line x1="3" y1="12" x2="21" y2="12"></line>
+          <line x1="3" y1="6" x2="21" y2="6"></line>
+          <line x1="3" y1="18" x2="21" y2="18"></line>
+        </svg>
+      </symbol>
+      <symbol id="svg-arrow-right" viewBox="0 0 24 24">
+        <title>Expand</title>
+        <svg
+          xmlns="http://www.w3.org/2000/svg"
+          viewBox="0 0 24 24"
+          fill="none"
+          stroke="currentColor"
+          stroke-width="2"
+          stroke-linecap="round"
+          stroke-linejoin="round"
+          class="feather-chevron-right"
+        >
+          <polyline points="9 18 15 12 9 6"></polyline>
+        </svg>
+      </symbol>
+      <symbol id="svg-sun" viewBox="0 0 24 24">
+        <title>Light mode</title>
+        <svg
+          xmlns="http://www.w3.org/2000/svg"
+          viewBox="0 0 24 24"
+          fill="none"
+          stroke="currentColor"
+          stroke-width="1.5"
+          stroke-linecap="round"
+          stroke-linejoin="round"
+          class="feather-sun"
+        >
+          <circle cx="12" cy="12" r="5"></circle>
+          <line x1="12" y1="1" x2="12" y2="3"></line>
+          <line x1="12" y1="21" x2="12" y2="23"></line>
+          <line x1="4.22" y1="4.22" x2="5.64" y2="5.64"></line>
+          <line x1="18.36" y1="18.36" x2="19.78" y2="19.78"></line>
+          <line x1="1" y1="12" x2="3" y2="12"></line>
+          <line x1="21" y1="12" x2="23" y2="12"></line>
+          <line x1="4.22" y1="19.78" x2="5.64" y2="18.36"></line>
+          <line x1="18.36" y1="5.64" x2="19.78" y2="4.22"></line>
+        </svg>
+      </symbol>
+      <symbol id="svg-moon" viewBox="0 0 24 24">
+        <title>Dark mode</title>
+        <svg
+          xmlns="http://www.w3.org/2000/svg"
+          viewBox="0 0 24 24"
+          fill="none"
+          stroke="currentColor"
+          stroke-width="1.5"
+          stroke-linecap="round"
+          stroke-linejoin="round"
+          class="icon-tabler-moon"
+        >
+          <path stroke="none" d="M0 0h24v24H0z" fill="none" />
+          <path
+            d="M12 3c.132 0 .263 0 .393 0a7.5 7.5 0 0 0 7.92 12.446a9 9 0 1 1 -8.313 -12.454z"
+          />
+        </svg>
+      </symbol>
+      <symbol id="svg-sun-half" viewBox="0 0 24 24">
+        <title>Auto light/dark mode</title>
+        <svg
+          xmlns="http://www.w3.org/2000/svg"
+          viewBox="0 0 24 24"
+          fill="none"
+          stroke="currentColor"
+          stroke-width="1.5"
+          stroke-linecap="round"
+          stroke-linejoin="round"
+          class="icon-tabler-shadow"
+        >
+          <path stroke="none" d="M0 0h24v24H0z" fill="none" />
+          <circle cx="12" cy="12" r="9" />
+          <path d="M13 12h5" />
+          <path d="M13 15h4" />
+          <path d="M13 18h1" />
+          <path d="M13 9h4" />
+          <path d="M13 6h1" />
+        </svg>
+      </symbol>
     </svg>
-  </symbol>
-</svg>
 
-<input type="checkbox" class="sidebar-toggle" name="__navigation" id="__navigation">
-<input type="checkbox" class="sidebar-toggle" name="__toc" id="__toc">
-<label class="overlay sidebar-overlay" for="__navigation">
-  <div class="visually-hidden">Hide navigation sidebar</div>
-</label>
-<label class="overlay toc-overlay" for="__toc">
-  <div class="visually-hidden">Hide table of contents sidebar</div>
-</label>
+    <input
+      type="checkbox"
+      class="sidebar-toggle"
+      name="__navigation"
+      id="__navigation"
+    />
+    <input type="checkbox" class="sidebar-toggle" name="__toc" id="__toc" />
+    <label class="overlay sidebar-overlay" for="__navigation">
+      <div class="visually-hidden">Hide navigation sidebar</div>
+    </label>
+    <label class="overlay toc-overlay" for="__toc">
+      <div class="visually-hidden">Hide table of contents sidebar</div>
+    </label>
 
-
-
-<div class="page">
-  <header class="mobile-header">
-    <div class="header-left">
-      <label class="nav-overlay-icon" for="__navigation">
-        <div class="visually-hidden">Toggle site navigation sidebar</div>
-        <i class="icon"><svg><use href="#svg-menu"></use></svg></i>
-      </label>
-    </div>
-    <div class="header-center">
-      <a href="index.html"><div class="brand">Skills Extractor v1.0.1 documentation</div></a>
-    </div>
-    <div class="header-right">
-      <div class="theme-toggle-container theme-toggle-header">
-        <button class="theme-toggle">
-          <div class="visually-hidden">Toggle Light / Dark / Auto color theme</div>
-          <svg class="theme-icon-when-auto"><use href="#svg-sun-half"></use></svg>
-          <svg class="theme-icon-when-dark"><use href="#svg-moon"></use></svg>
-          <svg class="theme-icon-when-light"><use href="#svg-sun"></use></svg>
-        </button>
-      </div>
-      <label class="toc-overlay-icon toc-header-icon" for="__toc">
-        <div class="visually-hidden">Toggle table of contents sidebar</div>
-        <i class="icon"><svg><use href="#svg-toc"></use></svg></i>
-      </label>
-    </div>
-  </header>
-  <aside class="sidebar-drawer">
-    <div class="sidebar-container">
-      
-      <div class="sidebar-sticky"><a class="sidebar-brand centered" href="index.html">
-  
-  <div class="sidebar-logo-container">
-    <img class="sidebar-logo" src="_static/nesta_escoe_transparent.png" alt="Logo"/>
-  </div>
-  
-  <span class="sidebar-brand-text">Skills Extractor v1.0.1 documentation</span>
-  
-</a><form class="sidebar-search-container" method="get" action="search.html" role="search">
-  <input class="sidebar-search" placeholder=Search name="q" aria-label="Search">
-  <input type="hidden" name="check_keywords" value="yes">
-  <input type="hidden" name="area" value="default">
-</form>
-<div id="searchbox"></div><div class="sidebar-scroll"><div class="sidebar-tree">
-  <ul class="current">
-<li class="toctree-l1"><a class="reference internal" href="about.html">Skills Extractor</a></li>
-<li class="toctree-l1"><a class="reference internal" href="custom_usage.html">Custom Usage</a></li>
-<li class="toctree-l1"><a class="reference internal" href="pipeline_summary.html">Pipeline summary and metrics</a></li>
-<li class="toctree-l1 current current-page"><a class="current reference internal" href="#">Model Cards</a></li>
-<li class="toctree-l1"><a class="reference internal" href="labelling.html">Entity Labelling</a></li>
-<li class="toctree-l1"><a class="reference internal" href="extract_skills.html">The <code class="docutils literal notranslate"><span class="pre">ExtractSkills</span></code> class</a></li>
-<li class="toctree-l1"><a class="reference internal" href="license.html">The MIT License (MIT)</a></li>
-</ul>
-
-</div>
-</div>
-
-      </div>
-      
-    </div>
-  </aside>
-  <div class="main">
-    <div class="content">
-      <div class="article-container">
-        <a href="#" class="back-to-top muted-link">
-          <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24">
-            <path d="M13 20h-2V8l-5.5 5.5-1.42-1.42L12 4.16l7.92 7.92-1.42 1.42L13 8v12z"></path>
-          </svg>
-          <span>Back to top</span>
-        </a>
-        <div class="content-icon-container">
-          
-<div class="theme-toggle-container theme-toggle-content">
+    <div class="page">
+      <header class="mobile-header">
+        <div class="header-left">
+          <label class="nav-overlay-icon" for="__navigation">
+            <div class="visually-hidden">Toggle site navigation sidebar</div>
+            <i class="icon"
+              ><svg><use href="#svg-menu"></use></svg
+            ></i>
+          </label>
+        </div>
+        <div class="header-center">
+          <a href="index.html"
+            ><div class="brand">Skills Extractor v1.0.1 documentation</div></a
+          >
+        </div>
+        <div class="header-right">
+          <div class="theme-toggle-container theme-toggle-header">
             <button class="theme-toggle">
-              <div class="visually-hidden">Toggle Light / Dark / Auto color theme</div>
-              <svg class="theme-icon-when-auto"><use href="#svg-sun-half"></use></svg>
-              <svg class="theme-icon-when-dark"><use href="#svg-moon"></use></svg>
-              <svg class="theme-icon-when-light"><use href="#svg-sun"></use></svg>
+              <div class="visually-hidden">
+                Toggle Light / Dark / Auto color theme
+              </div>
+              <svg class="theme-icon-when-auto">
+                <use href="#svg-sun-half"></use>
+              </svg>
+              <svg class="theme-icon-when-dark">
+                <use href="#svg-moon"></use>
+              </svg>
+              <svg class="theme-icon-when-light">
+                <use href="#svg-sun"></use>
+              </svg>
             </button>
           </div>
-          <label class="toc-overlay-icon toc-content-icon" for="__toc">
+          <label class="toc-overlay-icon toc-header-icon" for="__toc">
             <div class="visually-hidden">Toggle table of contents sidebar</div>
-            <i class="icon"><svg><use href="#svg-toc"></use></svg></i>
+            <i class="icon"
+              ><svg><use href="#svg-toc"></use></svg
+            ></i>
           </label>
         </div>
-        <article role="main">
-          <div class="section" id="model-cards">
-<h1>Model Cards<a class="headerlink" href="#model-cards" title="Permalink to this heading">#</a></h1>
-<p>This page contains information for different parts of the skills extraction and mapping pipeline. We detail the two main parts of the pipeline; the extract skills pipeline and the skills to taxonomy mapping pipeline.</p>
-<p>Developed by data scientists in Nesta’s Data Analytics Practice, (last updated on 23-11-2022).</p>
-<ul class="simple">
-<li><p><span class="xref myst">Model Card: Extract Skills</span></p></li>
-<li><p><span class="xref myst">Model Card: Skills to Taxonomy Mapping</span></p></li>
-</ul>
-<p><img alt="" src="_images/overview_example.png" />
-<em>An example of extracting skills and mapping them to the ESCO taxonomy.</em></p>
-<div class="section" id="model-card-named-entity-recognition-model-a-name-extract-skills-card-a">
-<h2>Model Card: Named Entity Recognition Model <a name="extract_skills_card"></a><a class="headerlink" href="#model-card-named-entity-recognition-model-a-name-extract-skills-card-a" title="Permalink to this heading">#</a></h2>
-<p><img alt="" src="_images/predict_flow.png" />
-<em>The extracting skills pipeline.</em></p>
-<div class="section" id="summary">
-<h3>Summary<a class="headerlink" href="#summary" title="Permalink to this heading">#</a></h3>
-<ul class="simple">
-<li><p>Train a Named Entity Recognition (NER) spaCy component to extract skills, multiskills and experience entities from job adverts.</p></li>
-<li><p>Predict whether or not a skill is multi-skill or not using scikit learn’s SVM model. Features are length of entity; if ‘and’ in entity; if ‘,’ in entity.</p></li>
-<li><p>Split multiskills, where possible, based on semantic rules.</p></li>
-</ul>
-</div>
-<div class="section" id="training">
-<h3>Training<a class="headerlink" href="#training" title="Permalink to this heading">#</a></h3>
-<ul class="simple">
-<li><p>For the NER model, 375 job adverts were labelled for skills, multiskills and experience.</p></li>
-<li><p>As of 15th November 2022, <strong>5641</strong> entities in 375 job adverts from OJO were labelled;</p></li>
-<li><p><strong>354</strong> are multiskill, <strong>4696</strong> are skill, and <strong>608</strong> were experience entities. 20% of the labelled entities were held out as a test set to evaluate the models.</p></li>
-</ul>
-<p>The NER model we trained used <a class="reference external" href="https://spacy.io/">spaCy’s</a> NER neural network architecture. Their NER architecture <em>“features a sophisticated word embedding strategy using subword features and ‘Bloom’ embeddings, a deep convolutional neural network with residual connections, and a novel transition-based approach to named entity parsing”</em> - more about this <a class="reference external" href="https://spacy.io/universe/project/video-spacys-ner-model">here</a>.</p>
-<p>You can read more about the creation of the labelling data <a class="reference internal" href="labelling.html"><span class="doc std std-doc">here</span></a>.</p>
-</div>
-<div class="section" id="ner-metrics">
-<h3>NER Metrics<a class="headerlink" href="#ner-metrics" title="Permalink to this heading">#</a></h3>
-<ul class="simple">
-<li><p>A metric in the python library nerevaluate (<a class="reference external" href="https://pypi.org/project/nervaluate/">read more here</a>) was used to calculate F1, precision and recall for the NER and SVM classifier on the held-out test set. As of 15th November 2022, the results are as follows:</p></li>
-</ul>
-<div class="table-wrapper colwidths-auto docutils container">
-<table class="colwidths-auto docutils align-default">
-<thead>
-<tr class="row-odd"><th class="head"><p>Entity</p></th>
-<th class="head"><p>F1</p></th>
-<th class="head"><p>Precision</p></th>
-<th class="head"><p>Recall</p></th>
-</tr>
-</thead>
-<tbody>
-<tr class="row-even"><td><p>Skill</p></td>
-<td><p>0.586</p></td>
-<td><p>0.679</p></td>
-<td><p>0.515</p></td>
-</tr>
-<tr class="row-odd"><td><p>Experience</p></td>
-<td><p>0.506</p></td>
-<td><p>0.648</p></td>
-<td><p>0.416</p></td>
-</tr>
-<tr class="row-even"><td><p>All</p></td>
-<td><p>0.563</p></td>
-<td><p>0.643</p></td>
-<td><p>0.500</p></td>
-</tr>
-</tbody>
-</table>
-</div>
-<ul class="simple">
-<li><p>These metrics use partial entity matching.</p></li>
-<li><p>More details of the evaluation performance across both the NER model and the SVM model can be found in <code class="docutils literal notranslate"><span class="pre">outputs/models/ner_model/20220825/train_details.json</span></code></p></li>
-</ul>
-</div>
-<div class="section" id="multiskill-metrics">
-<h3>Multiskill Metrics<a class="headerlink" href="#multiskill-metrics" title="Permalink to this heading">#</a></h3>
-<ul class="simple">
-<li><p>The same training data and held out test set used for the NER model was used to evaluate the SVM model. On a held out test set, the SVM model achieved 91% accuracy.</p></li>
-<li><p>When evaluating the multiskill splitter algorithm rules, 253 multiskill spans were labelled as ‘good’, ‘ok’ or ‘bad’ splits. Of the 253 multiskill spans, 80 were split. Of the splits, 66% were ‘good’, 9% were ‘ok’ and 25% were ‘bad’.</p></li>
-<li><p>More details of the evaluation performance across both the NER model and the SVM model can be found in <code class="docutils literal notranslate"><span class="pre">outputs/models/ner_model/20220825/train_details.json</span></code></p></li>
-</ul>
-</div>
-<div class="section" id="caveats-and-recommendations">
-<h3>Caveats and Recommendations<a class="headerlink" href="#caveats-and-recommendations" title="Permalink to this heading">#</a></h3>
-<ul class="simple">
-<li><p>As we take a rules based approach to splitting multiskills, many multiskills do not get split. If a multiskill is unable to be split, we still match to a taxonomy of choice. Future work should add more rules to split multiskills.</p></li>
-<li><p>We deduplicate the extracted skills in the output. This means that if a job advert mentions ‘excel skills’ twice and these entities are extracted, the output will just contain “excel skills” once. However, if the string is slightly different, e.g. “excel skills” and “Excel skill”, both occurrences will be outputted.</p></li>
-<li><p>Future work could look to train embeddings with job-specific texts, disambiguate acronyms and improve NER model performance.</p></li>
-</ul>
-</div>
-</div>
-<div class="section" id="model-card-skills-to-taxonomy-mapping-a-name-mapping-card-a">
-<h2>Model Card: Skills to Taxonomy Mapping <a name="mapping_card"></a><a class="headerlink" href="#model-card-skills-to-taxonomy-mapping-a-name-mapping-card-a" title="Permalink to this heading">#</a></h2>
-<p><img alt="" src="_images/match_flow.png" />
-<em>The methodology for matching skills to the ESCO taxonomy - threshold numbers can be changed in the config file.</em></p>
-<div class="section" id="id1">
-<h3>Summary<a class="headerlink" href="#id1" title="Permalink to this heading">#</a></h3>
-<ul class="simple">
-<li><p>Match to a taxonomy based on different similarity thresholds.</p></li>
-<li><p>First try to match at the most granular level of a taxonomy based on cosine similarity between embedded, extracted skill and taxonomy skills. Extracted and taxonomy skills are embedded using huggingface’s <a class="reference external" href="https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2">sentence-transformers/all-MiniLM-L6-v2</a> model.</p></li>
-<li><p>If there is no close granular skill above 0.7 cosine similarity (this threshold can be changed in configuration file), we then assign the skill to different levels of the taxonomy in one of two approaches (maximum share and maximum similarity - see diagram above for details).</p></li>
-<li><p>If matching to ESCO, 43 commonly occurring skills from a sample of 100,000 job adverts are hard coded.</p></li>
-</ul>
-</div>
-<div class="section" id="model-factors">
-<h3>Model Factors<a class="headerlink" href="#model-factors" title="Permalink to this heading">#</a></h3>
-<p>The main factors in this matching approach are: 1) the different thresholds at different levels of a taxonomy and 2) the different matching approaches.</p>
-</div>
-<div class="section" id="id2">
-<h3>Caveats and Recommendations<a class="headerlink" href="#id2" title="Permalink to this heading">#</a></h3>
-<p>This step does less well when:</p>
-<ul class="simple">
-<li><p>The extracted skill is a metaphor: i.e. ‘understand the bigger picture’ gets matched to ‘take pictures’</p></li>
-<li><p>The extracted skill is an acronym: i.e. ‘drafting ORSAs’ gets matched to ‘fine arts’</p></li>
-<li><p>The extracted skill is not a skill (poor NER model performance): i.e. ‘assist with the’ gets matched to providing general assistance to people</p></li>
-</ul>
-<p>We recommend that:</p>
-<ul class="simple">
-<li><p>Skill entities might match to the same taxonomy skill; the output does not deduplicate matched skills. If deduplicating is important, you will need to deduplicate at the taxonomy level.</p></li>
-<li><p>The current predefined configurations ensures that every extracted skill will be matched to a taxonomy. However, if a skill is matched to the highest skill group, we label it as ‘unmatched’. Under this definition, for ESCO we identify approximately 2% of skills as ‘unmatched’.</p></li>
-<li><p>The configuration file contains the relevant thresholds for matching per taxonomy. These thresholds will need to be manually tuned based on different taxonomies.</p></li>
-</ul>
-</div>
-</div>
-</div>
-
-        </article>
-      </div>
-      <footer>
-        
-        <div class="related-pages">
-          <a class="next-page" href="labelling.html">
-              <div class="page-info">
-                <div class="context">
-                  <span>Next</span>
-                </div>
-                <div class="title">Entity Labelling</div>
+      </header>
+      <aside class="sidebar-drawer">
+        <div class="sidebar-container">
+          <div class="sidebar-sticky">
+            <a class="sidebar-brand centered" href="index.html">
+              <div class="sidebar-logo-container">
+                <img
+                  class="sidebar-logo"
+                  src="_static/nesta_escoe_transparent.png"
+                  alt="Logo"
+                />
               </div>
-              <svg class="furo-related-icon"><use href="#svg-arrow-right"></use></svg>
+
+              <span class="sidebar-brand-text"
+                >Skills Extractor v1.0.1 documentation</span
+              >
             </a>
-          <a class="prev-page" href="pipeline_summary.html">
-              <svg class="furo-related-icon"><use href="#svg-arrow-right"></use></svg>
-              <div class="page-info">
-                <div class="context">
-                  <span>Previous</span>
-                </div>
-                
-                <div class="title">Pipeline summary and metrics</div>
-                
+            <form
+              class="sidebar-search-container"
+              method="get"
+              action="search.html"
+              role="search"
+            >
+              <input
+                class="sidebar-search"
+                placeholder="Search"
+                name="q"
+                aria-label="Search"
+              />
+              <input type="hidden" name="check_keywords" value="yes" />
+              <input type="hidden" name="area" value="default" />
+            </form>
+            <div id="searchbox"></div>
+            <div class="sidebar-scroll">
+              <div class="sidebar-tree">
+                <ul class="current">
+                  <li class="toctree-l1">
+                    <a class="reference internal" href="about.html"
+                      >Skills Extractor</a
+                    >
+                  </li>
+                  <li class="toctree-l1">
+                    <a class="reference internal" href="custom_usage.html"
+                      >Custom Usage</a
+                    >
+                  </li>
+                  <li class="toctree-l1">
+                    <a class="reference internal" href="pipeline_summary.html"
+                      >Pipeline summary and metrics</a
+                    >
+                  </li>
+                  <li class="toctree-l1 current current-page">
+                    <a class="current reference internal" href="#"
+                      >Model Cards</a
+                    >
+                  </li>
+                  <li class="toctree-l1">
+                    <a class="reference internal" href="labelling.html"
+                      >Entity Labelling</a
+                    >
+                  </li>
+                  <li class="toctree-l1">
+                    <a class="reference internal" href="extract_skills.html"
+                      >The
+                      <code class="docutils literal notranslate"
+                        ><span class="pre">ExtractSkills</span></code
+                      >
+                      class</a
+                    >
+                  </li>
+                  <li class="toctree-l1">
+                    <a class="reference internal" href="license.html"
+                      >The MIT License (MIT)</a
+                    >
+                  </li>
+                </ul>
               </div>
-            </a>
-        </div>
-        <div class="bottom-of-page">
-          <div class="left-details">
-            <div class="copyright">
-                Copyright &#169; 2022, Liz Gallagher, India Kerle
             </div>
-            Made with <a href="https://www.sphinx-doc.org/">Sphinx</a> and <a class="muted-link" href="https://pradyunsg.me">@pradyunsg</a>'s
-            
-            <a href="https://github.com/pradyunsg/furo">Furo</a>
-            
           </div>
-          <div class="right-details">
-            <div class="icons">
-              
+        </div>
+      </aside>
+      <div class="main">
+        <div class="content">
+          <div class="article-container">
+            <a href="#" class="back-to-top muted-link">
+              <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24">
+                <path
+                  d="M13 20h-2V8l-5.5 5.5-1.42-1.42L12 4.16l7.92 7.92-1.42 1.42L13 8v12z"
+                ></path>
+              </svg>
+              <span>Back to top</span>
+            </a>
+            <div class="content-icon-container">
+              <div class="theme-toggle-container theme-toggle-content">
+                <button class="theme-toggle">
+                  <div class="visually-hidden">
+                    Toggle Light / Dark / Auto color theme
+                  </div>
+                  <svg class="theme-icon-when-auto">
+                    <use href="#svg-sun-half"></use>
+                  </svg>
+                  <svg class="theme-icon-when-dark">
+                    <use href="#svg-moon"></use>
+                  </svg>
+                  <svg class="theme-icon-when-light">
+                    <use href="#svg-sun"></use>
+                  </svg>
+                </button>
+              </div>
+              <label class="toc-overlay-icon toc-content-icon" for="__toc">
+                <div class="visually-hidden">
+                  Toggle table of contents sidebar
+                </div>
+                <i class="icon"
+                  ><svg><use href="#svg-toc"></use></svg
+                ></i>
+              </label>
             </div>
+            <article role="main">
+              <section id="model-cards">
+                <h1>
+                  Model Cards<a
+                    class="headerlink"
+                    href="#model-cards"
+                    title="Permalink to this heading"
+                    >#</a
+                  >
+                </h1>
+                <p>
+                  This page contains information for different parts of the
+                  skills extraction and mapping pipeline. We detail the two main
+                  parts of the pipeline; the extract skills pipeline and the
+                  skills to taxonomy mapping pipeline.
+                </p>
+                <p>
+                  Developed by data scientists in Nesta’s Data Analytics
+                  Practice, (last updated on 23-11-2022).
+                </p>
+                <ul class="simple">
+                  <li>
+                    <p>
+                      <a class="reference internal" href="#extract_skills_card"
+                        ><span class="xref myst"
+                          >Model Card: Extract Skills</span
+                        ></a
+                      >
+                    </p>
+                  </li>
+                  <li>
+                    <p>
+                      <a class="reference internal" href="#mapping_card"
+                        ><span class="xref myst"
+                          >Model Card: Skills to Taxonomy Mapping</span
+                        ></a
+                      >
+                    </p>
+                  </li>
+                </ul>
+                <p>
+                  <img alt="" src="_images/overview_example.png" />
+                  <em
+                    >An example of extracting skills and mapping them to the
+                    ESCO taxonomy.</em
+                  >
+                </p>
+                <section id="model-card-named-entity-recognition-model">
+                  <h2>
+                    Model Card: Named Entity Recognition Model
+                    <a name="extract_skills_card"></a
+                    ><a
+                      class="headerlink"
+                      href="#model-card-named-entity-recognition-model"
+                      title="Permalink to this heading"
+                      >#</a
+                    >
+                  </h2>
+                  <p>
+                    <img alt="" src="_images/predict_flow.png" />
+                    <em>The extracting skills pipeline.</em>
+                  </p>
+                  <section id="summary">
+                    <h3>
+                      Summary<a
+                        class="headerlink"
+                        href="#summary"
+                        title="Permalink to this heading"
+                        >#</a
+                      >
+                    </h3>
+                    <ul class="simple">
+                      <li>
+                        <p>
+                          Train a Named Entity Recognition (NER) spaCy component
+                          to extract skills, multiskills and experience entities
+                          from job adverts.
+                        </p>
+                      </li>
+                      <li>
+                        <p>
+                          Predict whether or not a skill is multi-skill or not
+                          using scikit learn’s SVM model. Features are length of
+                          entity; if ‘and’ in entity; if ‘,’ in entity.
+                        </p>
+                      </li>
+                      <li>
+                        <p>
+                          Split multiskills, where possible, based on semantic
+                          rules.
+                        </p>
+                      </li>
+                    </ul>
+                  </section>
+                  <section id="training">
+                    <h3>
+                      Training<a
+                        class="headerlink"
+                        href="#training"
+                        title="Permalink to this heading"
+                        >#</a
+                      >
+                    </h3>
+                    <ul class="simple">
+                      <li>
+                        <p>
+                          For the NER model, 375 job adverts were labelled for
+                          skills, multiskills and experience.
+                        </p>
+                      </li>
+                      <li>
+                        <p>
+                          As of 15th November 2022,
+                          <strong>5641</strong> entities in 375 job adverts from
+                          OJO were labelled;
+                        </p>
+                      </li>
+                      <li>
+                        <p>
+                          <strong>354</strong> are multiskill,
+                          <strong>4696</strong> are skill, and
+                          <strong>608</strong> were experience entities. 20% of
+                          the labelled entities were held out as a test set to
+                          evaluate the models.
+                        </p>
+                      </li>
+                    </ul>
+                    <p>
+                      The NER model we trained used
+                      <a class="reference external" href="https://spacy.io/"
+                        >spaCy’s</a
+                      >
+                      NER neural network architecture. Their NER architecture
+                      <em
+                        >“features a sophisticated word embedding strategy using
+                        subword features and ‘Bloom’ embeddings, a deep
+                        convolutional neural network with residual connections,
+                        and a novel transition-based approach to named entity
+                        parsing”</em
+                      >
+                      - more about this
+                      <a
+                        class="reference external"
+                        href="https://spacy.io/universe/project/video-spacys-ner-model"
+                        >here</a
+                      >.
+                    </p>
+                    <p>
+                      You can read more about the creation of the labelling data
+                      <a class="reference internal" href="labelling.html"
+                        ><span class="std std-doc">here</span></a
+                      >.
+                    </p>
+                  </section>
+                  <section id="ner-metrics">
+                    <h3>
+                      NER Metrics<a
+                        class="headerlink"
+                        href="#ner-metrics"
+                        title="Permalink to this heading"
+                        >#</a
+                      >
+                    </h3>
+                    <ul class="simple">
+                      <li>
+                        <p>
+                          A metric in the python library nerevaluate (<a
+                            class="reference external"
+                            href="https://pypi.org/project/nervaluate/"
+                            >read more here</a
+                          >) was used to calculate F1, precision and recall for
+                          the NER and SVM classifier on the held-out test set.
+                          As of 15th November 2022, the results are as follows:
+                        </p>
+                      </li>
+                    </ul>
+                    <div
+                      class="table-wrapper colwidths-auto docutils container"
+                    >
+                      <table class="docutils align-default">
+                        <thead>
+                          <tr class="row-odd">
+                            <th class="head"><p>Entity</p></th>
+                            <th class="head"><p>F1</p></th>
+                            <th class="head"><p>Precision</p></th>
+                            <th class="head"><p>Recall</p></th>
+                          </tr>
+                        </thead>
+                        <tbody>
+                          <tr class="row-even">
+                            <td><p>Skill</p></td>
+                            <td><p>0.586</p></td>
+                            <td><p>0.679</p></td>
+                            <td><p>0.515</p></td>
+                          </tr>
+                          <tr class="row-odd">
+                            <td><p>Experience</p></td>
+                            <td><p>0.506</p></td>
+                            <td><p>0.648</p></td>
+                            <td><p>0.416</p></td>
+                          </tr>
+                          <tr class="row-even">
+                            <td><p>All</p></td>
+                            <td><p>0.563</p></td>
+                            <td><p>0.643</p></td>
+                            <td><p>0.500</p></td>
+                          </tr>
+                        </tbody>
+                      </table>
+                    </div>
+                    <ul class="simple">
+                      <li><p>These metrics use partial entity matching.</p></li>
+                      <li>
+                        <p>
+                          More details of the evaluation performance across both
+                          the NER model and the SVM model can be found in
+                          <code class="docutils literal notranslate"
+                            ><span class="pre"
+                              >outputs/models/ner_model/20220825/train_details.json</span
+                            ></code
+                          >
+                        </p>
+                      </li>
+                    </ul>
+                  </section>
+                  <section id="multiskill-metrics">
+                    <h3>
+                      Multiskill Metrics<a
+                        class="headerlink"
+                        href="#multiskill-metrics"
+                        title="Permalink to this heading"
+                        >#</a
+                      >
+                    </h3>
+                    <ul class="simple">
+                      <li>
+                        <p>
+                          The same training data and held out test set used for
+                          the NER model was used to evaluate the SVM model. On a
+                          held out test set, the SVM model achieved 91%
+                          accuracy.
+                        </p>
+                      </li>
+                      <li>
+                        <p>
+                          When evaluating the multiskill splitter algorithm
+                          rules, 253 multiskill spans were labelled as ‘good’,
+                          ‘ok’ or ‘bad’ splits. Of the 253 multiskill spans, 80
+                          were split. Of the splits, 66% were ‘good’, 9% were
+                          ‘ok’ and 25% were ‘bad’.
+                        </p>
+                      </li>
+                      <li>
+                        <p>
+                          More details of the evaluation performance across both
+                          the NER model and the SVM model can be found in
+                          <code class="docutils literal notranslate"
+                            ><span class="pre"
+                              >outputs/models/ner_model/20220825/train_details.json</span
+                            ></code
+                          >
+                        </p>
+                      </li>
+                    </ul>
+                  </section>
+                  <section id="caveats-and-recommendations">
+                    <h3>
+                      Caveats and Recommendations<a
+                        class="headerlink"
+                        href="#caveats-and-recommendations"
+                        title="Permalink to this heading"
+                        >#</a
+                      >
+                    </h3>
+                    <ul class="simple">
+                      <li>
+                        <p>
+                          As we take a rules based approach to splitting
+                          multiskills, many multiskills do not get split. If a
+                          multiskill is unable to be split, we still match to a
+                          taxonomy of choice. Future work should add more rules
+                          to split multiskills.
+                        </p>
+                      </li>
+                      <li>
+                        <p>
+                          We deduplicate the extracted skills in the output.
+                          This means that if a job advert mentions ‘excel
+                          skills’ twice and these entities are extracted, the
+                          output will just contain “excel skills” once. However,
+                          if the string is slightly different, e.g. “excel
+                          skills” and “Excel skill”, both occurrences will be
+                          outputted.
+                        </p>
+                      </li>
+                      <li>
+                        <p>
+                          Future work could look to train embeddings with
+                          job-specific texts, disambiguate acronyms and improve
+                          NER model performance.
+                        </p>
+                      </li>
+                    </ul>
+                  </section>
+                </section>
+                <section id="model-card-skills-to-taxonomy-mapping">
+                  <h2>
+                    Model Card: Skills to Taxonomy Mapping
+                    <a name="mapping_card"></a
+                    ><a
+                      class="headerlink"
+                      href="#model-card-skills-to-taxonomy-mapping"
+                      title="Permalink to this heading"
+                      >#</a
+                    >
+                  </h2>
+                  <p>
+                    <img alt="" src="_images/match_flow.png" />
+                    <em
+                      >The methodology for matching skills to the ESCO taxonomy
+                      - threshold numbers can be changed in the config file.</em
+                    >
+                  </p>
+                  <section id="id1">
+                    <h3>
+                      Summary<a
+                        class="headerlink"
+                        href="#id1"
+                        title="Permalink to this heading"
+                        >#</a
+                      >
+                    </h3>
+                    <ul class="simple">
+                      <li>
+                        <p>
+                          Match to a taxonomy based on different similarity
+                          thresholds.
+                        </p>
+                      </li>
+                      <li>
+                        <p>
+                          First try to match at the most granular level of a
+                          taxonomy based on cosine similarity between embedded,
+                          extracted skill and taxonomy skills. Extracted and
+                          taxonomy skills are embedded using huggingface’s
+                          <a
+                            class="reference external"
+                            href="https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2"
+                            >sentence-transformers/all-MiniLM-L6-v2</a
+                          >
+                          model.
+                        </p>
+                      </li>
+                      <li>
+                        <p>
+                          If there is no close granular skill above 0.7 cosine
+                          similarity (this threshold can be changed in
+                          configuration file), we then assign the skill to
+                          different levels of the taxonomy in one of two
+                          approaches (maximum share and maximum similarity - see
+                          diagram above for details).
+                        </p>
+                      </li>
+                      <li>
+                        <p>
+                          If matching to ESCO, 43 commonly occurring skills from
+                          a sample of 100,000 job adverts are hard coded.
+                        </p>
+                      </li>
+                    </ul>
+                  </section>
+                  <section id="model-factors">
+                    <h3>
+                      Model Factors<a
+                        class="headerlink"
+                        href="#model-factors"
+                        title="Permalink to this heading"
+                        >#</a
+                      >
+                    </h3>
+                    <p>
+                      The main factors in this matching approach are: 1) the
+                      different thresholds at different levels of a taxonomy and
+                      2) the different matching approaches.
+                    </p>
+                  </section>
+                  <section id="id2">
+                    <h3>
+                      Caveats and Recommendations<a
+                        class="headerlink"
+                        href="#id2"
+                        title="Permalink to this heading"
+                        >#</a
+                      >
+                    </h3>
+                    <p>This step does less well when:</p>
+                    <ul class="simple">
+                      <li>
+                        <p>
+                          The extracted skill is a metaphor: i.e. ‘understand
+                          the bigger picture’ gets matched to ‘take pictures’
+                        </p>
+                      </li>
+                      <li>
+                        <p>
+                          The extracted skill is an acronym: i.e. ‘drafting
+                          ORSAs’ gets matched to ‘fine arts’
+                        </p>
+                      </li>
+                      <li>
+                        <p>
+                          The extracted skill is not a skill (poor NER model
+                          performance): i.e. ‘assist with the’ gets matched to
+                          providing general assistance to people
+                        </p>
+                      </li>
+                    </ul>
+                    <p>We recommend that:</p>
+                    <ul class="simple">
+                      <li>
+                        <p>
+                          Skill entities might match to the same taxonomy skill;
+                          the output does not deduplicate matched skills. If
+                          deduplicating is important, you will need to
+                          deduplicate at the taxonomy level.
+                        </p>
+                      </li>
+                      <li>
+                        <p>
+                          The current predefined configurations ensures that
+                          every extracted skill will be matched to a taxonomy.
+                          However, if a skill is matched to the highest skill
+                          group, we label it as ‘unmatched’. Under this
+                          definition, for ESCO we identify approximately 2% of
+                          skills as ‘unmatched’.
+                        </p>
+                      </li>
+                      <li>
+                        <p>
+                          The configuration file contains the relevant
+                          thresholds for matching per taxonomy. These thresholds
+                          will need to be manually tuned based on different
+                          taxonomies.
+                        </p>
+                      </li>
+                    </ul>
+                  </section>
+                </section>
+              </section>
+            </article>
           </div>
-        </div>
-        
-      </footer>
-    </div>
-    <aside class="toc-drawer">
-      
-      
-      <div class="toc-sticky toc-scroll">
-        <div class="toc-title-container">
-          <span class="toc-title">
-            On this page
-          </span>
-        </div>
-        <div class="toc-tree-container">
-          <div class="toc-tree">
-            <ul>
-<li><a class="reference internal" href="#">Model Cards</a><ul>
-<li><a class="reference internal" href="#model-card-named-entity-recognition-model-a-name-extract-skills-card-a">Model Card: Named Entity Recognition Model <a name="extract_skills_card"></a></a><ul>
-<li><a class="reference internal" href="#summary">Summary</a></li>
-<li><a class="reference internal" href="#training">Training</a></li>
-<li><a class="reference internal" href="#ner-metrics">NER Metrics</a></li>
-<li><a class="reference internal" href="#multiskill-metrics">Multiskill Metrics</a></li>
-<li><a class="reference internal" href="#caveats-and-recommendations">Caveats and Recommendations</a></li>
-</ul>
-</li>
-<li><a class="reference internal" href="#model-card-skills-to-taxonomy-mapping-a-name-mapping-card-a">Model Card: Skills to Taxonomy Mapping <a name="mapping_card"></a></a><ul>
-<li><a class="reference internal" href="#id1">Summary</a></li>
-<li><a class="reference internal" href="#model-factors">Model Factors</a></li>
-<li><a class="reference internal" href="#id2">Caveats and Recommendations</a></li>
-</ul>
-</li>
-</ul>
-</li>
-</ul>
+          <footer>
+            <div class="related-pages">
+              <a class="next-page" href="labelling.html">
+                <div class="page-info">
+                  <div class="context">
+                    <span>Next</span>
+                  </div>
+                  <div class="title">Entity Labelling</div>
+                </div>
+                <svg class="furo-related-icon">
+                  <use href="#svg-arrow-right"></use>
+                </svg>
+              </a>
+              <a class="prev-page" href="pipeline_summary.html">
+                <svg class="furo-related-icon">
+                  <use href="#svg-arrow-right"></use>
+                </svg>
+                <div class="page-info">
+                  <div class="context">
+                    <span>Previous</span>
+                  </div>
 
-          </div>
+                  <div class="title">Pipeline summary and metrics</div>
+                </div>
+              </a>
+            </div>
+            <div class="bottom-of-page">
+              <div class="left-details">
+                <div class="copyright">
+                  Copyright &#169; 2022, Liz Gallagher, India Kerle
+                </div>
+                Made with <a href="https://www.sphinx-doc.org/">Sphinx</a> and
+                <a class="muted-link" href="https://pradyunsg.me">@pradyunsg</a
+                >'s
+
+                <a href="https://github.com/pradyunsg/furo">Furo</a>
+              </div>
+              <div class="right-details">
+                <div class="icons"></div>
+              </div>
+            </div>
+          </footer>
         </div>
+        <aside class="toc-drawer">
+          <div class="toc-sticky toc-scroll">
+            <div class="toc-title-container">
+              <span class="toc-title"> On this page </span>
+            </div>
+            <div class="toc-tree-container">
+              <div class="toc-tree">
+                <ul>
+                  <li>
+                    <a class="reference internal" href="#">Model Cards</a>
+                    <ul>
+                      <li>
+                        <a
+                          class="reference internal"
+                          href="#model-card-named-entity-recognition-model"
+                          >Model Card: Named Entity Recognition Model
+                          <a name="extract_skills_card"></a
+                        ></a>
+                        <ul>
+                          <li>
+                            <a class="reference internal" href="#summary"
+                              >Summary</a
+                            >
+                          </li>
+                          <li>
+                            <a class="reference internal" href="#training"
+                              >Training</a
+                            >
+                          </li>
+                          <li>
+                            <a class="reference internal" href="#ner-metrics"
+                              >NER Metrics</a
+                            >
+                          </li>
+                          <li>
+                            <a
+                              class="reference internal"
+                              href="#multiskill-metrics"
+                              >Multiskill Metrics</a
+                            >
+                          </li>
+                          <li>
+                            <a
+                              class="reference internal"
+                              href="#caveats-and-recommendations"
+                              >Caveats and Recommendations</a
+                            >
+                          </li>
+                        </ul>
+                      </li>
+                      <li>
+                        <a
+                          class="reference internal"
+                          href="#model-card-skills-to-taxonomy-mapping"
+                          >Model Card: Skills to Taxonomy Mapping
+                          <a name="mapping_card"></a
+                        ></a>
+                        <ul>
+                          <li>
+                            <a class="reference internal" href="#id1"
+                              >Summary</a
+                            >
+                          </li>
+                          <li>
+                            <a class="reference internal" href="#model-factors"
+                              >Model Factors</a
+                            >
+                          </li>
+                          <li>
+                            <a class="reference internal" href="#id2"
+                              >Caveats and Recommendations</a
+                            >
+                          </li>
+                        </ul>
+                      </li>
+                    </ul>
+                  </li>
+                </ul>
+              </div>
+            </div>
+          </div>
+        </aside>
       </div>
-      
-      
-    </aside>
-  </div>
-</div><script data-url_root="./" id="documentation_options" src="_static/documentation_options.js"></script>
+    </div>
+    <script
+      data-url_root="./"
+      id="documentation_options"
+      src="_static/documentation_options.js"
+    ></script>
     <script src="_static/jquery.js"></script>
     <script src="_static/underscore.js"></script>
     <script src="_static/_sphinx_javascript_frameworks_compat.js"></script>
     <script src="_static/doctools.js"></script>
     <script src="_static/sphinx_highlight.js"></script>
     <script src="_static/scripts/furo.js"></script>
-    </body>
-</html>
\ No newline at end of file
+  </body>
+</html>
diff --git a/docs/build/html/pipeline_summary.html b/docs/build/html/pipeline_summary.html
index 72e13ff3..9aa57ed1 100644
--- a/docs/build/html/pipeline_summary.html
+++ b/docs/build/html/pipeline_summary.html
@@ -1,451 +1,935 @@
-<!doctype html>
+<!DOCTYPE html>
 <html class="no-js" lang="en">
-  <head><meta charset="utf-8"/>
-    <meta name="viewport" content="width=device-width,initial-scale=1"/>
-    <meta name="color-scheme" content="light dark"><link rel="author" title="About these documents" href="about.html" /><link rel="index" title="Index" href="genindex.html" /><link rel="search" title="Search" href="search.html" /><link rel="next" title="Model Cards" href="model_card.html" /><link rel="prev" title="Custom Usage" href="custom_usage.html" />
+  <head>
+    <meta charset="utf-8" />
+    <meta name="viewport" content="width=device-width,initial-scale=1" />
+    <meta name="color-scheme" content="light dark" />
+    <meta
+      name="generator"
+      content="Docutils 0.18.1: http://docutils.sourceforge.net/"
+    />
+    <link rel="author" title="About these documents" href="about.html" />
+    <link rel="index" title="Index" href="genindex.html" />
+    <link rel="search" title="Search" href="search.html" />
+    <link rel="next" title="Model Cards" href="model_card.html" />
+    <link rel="prev" title="Custom Usage" href="custom_usage.html" />
 
-    <meta name="generator" content="sphinx-5.3.0, furo 2022.09.29"/>
-        <title>Pipeline summary and metrics - Skills Extractor v1.0.1 documentation</title>
-      <link rel="stylesheet" type="text/css" href="_static/pygments.css" />
-    <link rel="stylesheet" type="text/css" href="_static/styles/furo.css?digest=d81277517bee4d6b0349d71bb2661d4890b5617c" />
-    <link rel="stylesheet" type="text/css" href="_static/styles/furo-extensions.css?digest=30d1aed668e5c3a91c3e3bf6a60b675221979f0e" />
-    
-    
+    <meta name="generator" content="sphinx-5.3.0, furo 2022.09.29" />
+    <title>
+      Pipeline summary and metrics - Skills Extractor v1.0.1 documentation
+    </title>
+    <link rel="stylesheet" type="text/css" href="_static/pygments.css" />
+    <link
+      rel="stylesheet"
+      type="text/css"
+      href="_static/styles/furo.css?digest=d81277517bee4d6b0349d71bb2661d4890b5617c"
+    />
+    <link
+      rel="stylesheet"
+      type="text/css"
+      href="_static/styles/furo-extensions.css?digest=30d1aed668e5c3a91c3e3bf6a60b675221979f0e"
+    />
 
-
-<style>
-  body {
-    --color-code-background: #f8f8f8;
-  --color-code-foreground: black;
-  
-  }
-  @media not print {
-    body[data-theme="dark"] {
-      --color-code-background: #202020;
-  --color-code-foreground: #d0d0d0;
-  
-    }
-    @media (prefers-color-scheme: dark) {
-      body:not([data-theme="light"]) {
-        --color-code-background: #202020;
-  --color-code-foreground: #d0d0d0;
-  
+    <style>
+      body {
+        --color-code-background: #f8f8f8;
+        --color-code-foreground: black;
+      }
+      @media not print {
+        body[data-theme="dark"] {
+          --color-code-background: #202020;
+          --color-code-foreground: #d0d0d0;
+        }
+        @media (prefers-color-scheme: dark) {
+          body:not([data-theme="light"]) {
+            --color-code-background: #202020;
+            --color-code-foreground: #d0d0d0;
+          }
+        }
       }
-    }
-  }
-</style></head>
+    </style>
+  </head>
   <body>
-    
     <script>
       document.body.dataset.theme = localStorage.getItem("theme") || "auto";
     </script>
-    
 
-<svg xmlns="http://www.w3.org/2000/svg" style="display: none;">
-  <symbol id="svg-toc" viewBox="0 0 24 24">
-    <title>Contents</title>
-    <svg stroke="currentColor" fill="currentColor" stroke-width="0" viewBox="0 0 1024 1024">
-      <path d="M408 442h480c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8H408c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8zm-8 204c0 4.4 3.6 8 8 8h480c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8H408c-4.4 0-8 3.6-8 8v56zm504-486H120c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8h784c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8zm0 632H120c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8h784c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8zM115.4 518.9L271.7 642c5.8 4.6 14.4.5 14.4-6.9V388.9c0-7.4-8.5-11.5-14.4-6.9L115.4 505.1a8.74 8.74 0 0 0 0 13.8z"/>
-    </svg>
-  </symbol>
-  <symbol id="svg-menu" viewBox="0 0 24 24">
-    <title>Menu</title>
-    <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
-      stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="feather-menu">
-      <line x1="3" y1="12" x2="21" y2="12"></line>
-      <line x1="3" y1="6" x2="21" y2="6"></line>
-      <line x1="3" y1="18" x2="21" y2="18"></line>
-    </svg>
-  </symbol>
-  <symbol id="svg-arrow-right" viewBox="0 0 24 24">
-    <title>Expand</title>
-    <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
-      stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="feather-chevron-right">
-      <polyline points="9 18 15 12 9 6"></polyline>
-    </svg>
-  </symbol>
-  <symbol id="svg-sun" viewBox="0 0 24 24">
-    <title>Light mode</title>
-    <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
-      stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round" class="feather-sun">
-      <circle cx="12" cy="12" r="5"></circle>
-      <line x1="12" y1="1" x2="12" y2="3"></line>
-      <line x1="12" y1="21" x2="12" y2="23"></line>
-      <line x1="4.22" y1="4.22" x2="5.64" y2="5.64"></line>
-      <line x1="18.36" y1="18.36" x2="19.78" y2="19.78"></line>
-      <line x1="1" y1="12" x2="3" y2="12"></line>
-      <line x1="21" y1="12" x2="23" y2="12"></line>
-      <line x1="4.22" y1="19.78" x2="5.64" y2="18.36"></line>
-      <line x1="18.36" y1="5.64" x2="19.78" y2="4.22"></line>
-    </svg>
-  </symbol>
-  <symbol id="svg-moon" viewBox="0 0 24 24">
-    <title>Dark mode</title>
-    <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
-      stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round" class="icon-tabler-moon">
-      <path stroke="none" d="M0 0h24v24H0z" fill="none" />
-      <path d="M12 3c.132 0 .263 0 .393 0a7.5 7.5 0 0 0 7.92 12.446a9 9 0 1 1 -8.313 -12.454z" />
-    </svg>
-  </symbol>
-  <symbol id="svg-sun-half" viewBox="0 0 24 24">
-    <title>Auto light/dark mode</title>
-    <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
-      stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round" class="icon-tabler-shadow">
-      <path stroke="none" d="M0 0h24v24H0z" fill="none"/>
-      <circle cx="12" cy="12" r="9" />
-      <path d="M13 12h5" />
-      <path d="M13 15h4" />
-      <path d="M13 18h1" />
-      <path d="M13 9h4" />
-      <path d="M13 6h1" />
+    <svg xmlns="http://www.w3.org/2000/svg" style="display: none">
+      <symbol id="svg-toc" viewBox="0 0 24 24">
+        <title>Contents</title>
+        <svg
+          stroke="currentColor"
+          fill="currentColor"
+          stroke-width="0"
+          viewBox="0 0 1024 1024"
+        >
+          <path
+            d="M408 442h480c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8H408c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8zm-8 204c0 4.4 3.6 8 8 8h480c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8H408c-4.4 0-8 3.6-8 8v56zm504-486H120c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8h784c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8zm0 632H120c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8h784c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8zM115.4 518.9L271.7 642c5.8 4.6 14.4.5 14.4-6.9V388.9c0-7.4-8.5-11.5-14.4-6.9L115.4 505.1a8.74 8.74 0 0 0 0 13.8z"
+          />
+        </svg>
+      </symbol>
+      <symbol id="svg-menu" viewBox="0 0 24 24">
+        <title>Menu</title>
+        <svg
+          xmlns="http://www.w3.org/2000/svg"
+          viewBox="0 0 24 24"
+          fill="none"
+          stroke="currentColor"
+          stroke-width="2"
+          stroke-linecap="round"
+          stroke-linejoin="round"
+          class="feather-menu"
+        >
+          <line x1="3" y1="12" x2="21" y2="12"></line>
+          <line x1="3" y1="6" x2="21" y2="6"></line>
+          <line x1="3" y1="18" x2="21" y2="18"></line>
+        </svg>
+      </symbol>
+      <symbol id="svg-arrow-right" viewBox="0 0 24 24">
+        <title>Expand</title>
+        <svg
+          xmlns="http://www.w3.org/2000/svg"
+          viewBox="0 0 24 24"
+          fill="none"
+          stroke="currentColor"
+          stroke-width="2"
+          stroke-linecap="round"
+          stroke-linejoin="round"
+          class="feather-chevron-right"
+        >
+          <polyline points="9 18 15 12 9 6"></polyline>
+        </svg>
+      </symbol>
+      <symbol id="svg-sun" viewBox="0 0 24 24">
+        <title>Light mode</title>
+        <svg
+          xmlns="http://www.w3.org/2000/svg"
+          viewBox="0 0 24 24"
+          fill="none"
+          stroke="currentColor"
+          stroke-width="1.5"
+          stroke-linecap="round"
+          stroke-linejoin="round"
+          class="feather-sun"
+        >
+          <circle cx="12" cy="12" r="5"></circle>
+          <line x1="12" y1="1" x2="12" y2="3"></line>
+          <line x1="12" y1="21" x2="12" y2="23"></line>
+          <line x1="4.22" y1="4.22" x2="5.64" y2="5.64"></line>
+          <line x1="18.36" y1="18.36" x2="19.78" y2="19.78"></line>
+          <line x1="1" y1="12" x2="3" y2="12"></line>
+          <line x1="21" y1="12" x2="23" y2="12"></line>
+          <line x1="4.22" y1="19.78" x2="5.64" y2="18.36"></line>
+          <line x1="18.36" y1="5.64" x2="19.78" y2="4.22"></line>
+        </svg>
+      </symbol>
+      <symbol id="svg-moon" viewBox="0 0 24 24">
+        <title>Dark mode</title>
+        <svg
+          xmlns="http://www.w3.org/2000/svg"
+          viewBox="0 0 24 24"
+          fill="none"
+          stroke="currentColor"
+          stroke-width="1.5"
+          stroke-linecap="round"
+          stroke-linejoin="round"
+          class="icon-tabler-moon"
+        >
+          <path stroke="none" d="M0 0h24v24H0z" fill="none" />
+          <path
+            d="M12 3c.132 0 .263 0 .393 0a7.5 7.5 0 0 0 7.92 12.446a9 9 0 1 1 -8.313 -12.454z"
+          />
+        </svg>
+      </symbol>
+      <symbol id="svg-sun-half" viewBox="0 0 24 24">
+        <title>Auto light/dark mode</title>
+        <svg
+          xmlns="http://www.w3.org/2000/svg"
+          viewBox="0 0 24 24"
+          fill="none"
+          stroke="currentColor"
+          stroke-width="1.5"
+          stroke-linecap="round"
+          stroke-linejoin="round"
+          class="icon-tabler-shadow"
+        >
+          <path stroke="none" d="M0 0h24v24H0z" fill="none" />
+          <circle cx="12" cy="12" r="9" />
+          <path d="M13 12h5" />
+          <path d="M13 15h4" />
+          <path d="M13 18h1" />
+          <path d="M13 9h4" />
+          <path d="M13 6h1" />
+        </svg>
+      </symbol>
     </svg>
-  </symbol>
-</svg>
 
-<input type="checkbox" class="sidebar-toggle" name="__navigation" id="__navigation">
-<input type="checkbox" class="sidebar-toggle" name="__toc" id="__toc">
-<label class="overlay sidebar-overlay" for="__navigation">
-  <div class="visually-hidden">Hide navigation sidebar</div>
-</label>
-<label class="overlay toc-overlay" for="__toc">
-  <div class="visually-hidden">Hide table of contents sidebar</div>
-</label>
+    <input
+      type="checkbox"
+      class="sidebar-toggle"
+      name="__navigation"
+      id="__navigation"
+    />
+    <input type="checkbox" class="sidebar-toggle" name="__toc" id="__toc" />
+    <label class="overlay sidebar-overlay" for="__navigation">
+      <div class="visually-hidden">Hide navigation sidebar</div>
+    </label>
+    <label class="overlay toc-overlay" for="__toc">
+      <div class="visually-hidden">Hide table of contents sidebar</div>
+    </label>
 
-
-
-<div class="page">
-  <header class="mobile-header">
-    <div class="header-left">
-      <label class="nav-overlay-icon" for="__navigation">
-        <div class="visually-hidden">Toggle site navigation sidebar</div>
-        <i class="icon"><svg><use href="#svg-menu"></use></svg></i>
-      </label>
-    </div>
-    <div class="header-center">
-      <a href="index.html"><div class="brand">Skills Extractor v1.0.1 documentation</div></a>
-    </div>
-    <div class="header-right">
-      <div class="theme-toggle-container theme-toggle-header">
-        <button class="theme-toggle">
-          <div class="visually-hidden">Toggle Light / Dark / Auto color theme</div>
-          <svg class="theme-icon-when-auto"><use href="#svg-sun-half"></use></svg>
-          <svg class="theme-icon-when-dark"><use href="#svg-moon"></use></svg>
-          <svg class="theme-icon-when-light"><use href="#svg-sun"></use></svg>
-        </button>
-      </div>
-      <label class="toc-overlay-icon toc-header-icon" for="__toc">
-        <div class="visually-hidden">Toggle table of contents sidebar</div>
-        <i class="icon"><svg><use href="#svg-toc"></use></svg></i>
-      </label>
-    </div>
-  </header>
-  <aside class="sidebar-drawer">
-    <div class="sidebar-container">
-      
-      <div class="sidebar-sticky"><a class="sidebar-brand centered" href="index.html">
-  
-  <div class="sidebar-logo-container">
-    <img class="sidebar-logo" src="_static/nesta_escoe_transparent.png" alt="Logo"/>
-  </div>
-  
-  <span class="sidebar-brand-text">Skills Extractor v1.0.1 documentation</span>
-  
-</a><form class="sidebar-search-container" method="get" action="search.html" role="search">
-  <input class="sidebar-search" placeholder=Search name="q" aria-label="Search">
-  <input type="hidden" name="check_keywords" value="yes">
-  <input type="hidden" name="area" value="default">
-</form>
-<div id="searchbox"></div><div class="sidebar-scroll"><div class="sidebar-tree">
-  <ul class="current">
-<li class="toctree-l1"><a class="reference internal" href="about.html">Skills Extractor</a></li>
-<li class="toctree-l1"><a class="reference internal" href="custom_usage.html">Custom Usage</a></li>
-<li class="toctree-l1 current current-page"><a class="current reference internal" href="#">Pipeline summary and metrics</a></li>
-<li class="toctree-l1"><a class="reference internal" href="model_card.html">Model Cards</a></li>
-<li class="toctree-l1"><a class="reference internal" href="labelling.html">Entity Labelling</a></li>
-<li class="toctree-l1"><a class="reference internal" href="extract_skills.html">The <code class="docutils literal notranslate"><span class="pre">ExtractSkills</span></code> class</a></li>
-<li class="toctree-l1"><a class="reference internal" href="license.html">The MIT License (MIT)</a></li>
-</ul>
-
-</div>
-</div>
-
-      </div>
-      
-    </div>
-  </aside>
-  <div class="main">
-    <div class="content">
-      <div class="article-container">
-        <a href="#" class="back-to-top muted-link">
-          <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24">
-            <path d="M13 20h-2V8l-5.5 5.5-1.42-1.42L12 4.16l7.92 7.92-1.42 1.42L13 8v12z"></path>
-          </svg>
-          <span>Back to top</span>
-        </a>
-        <div class="content-icon-container">
-          
-<div class="theme-toggle-container theme-toggle-content">
+    <div class="page">
+      <header class="mobile-header">
+        <div class="header-left">
+          <label class="nav-overlay-icon" for="__navigation">
+            <div class="visually-hidden">Toggle site navigation sidebar</div>
+            <i class="icon"
+              ><svg><use href="#svg-menu"></use></svg
+            ></i>
+          </label>
+        </div>
+        <div class="header-center">
+          <a href="index.html"
+            ><div class="brand">Skills Extractor v1.0.1 documentation</div></a
+          >
+        </div>
+        <div class="header-right">
+          <div class="theme-toggle-container theme-toggle-header">
             <button class="theme-toggle">
-              <div class="visually-hidden">Toggle Light / Dark / Auto color theme</div>
-              <svg class="theme-icon-when-auto"><use href="#svg-sun-half"></use></svg>
-              <svg class="theme-icon-when-dark"><use href="#svg-moon"></use></svg>
-              <svg class="theme-icon-when-light"><use href="#svg-sun"></use></svg>
+              <div class="visually-hidden">
+                Toggle Light / Dark / Auto color theme
+              </div>
+              <svg class="theme-icon-when-auto">
+                <use href="#svg-sun-half"></use>
+              </svg>
+              <svg class="theme-icon-when-dark">
+                <use href="#svg-moon"></use>
+              </svg>
+              <svg class="theme-icon-when-light">
+                <use href="#svg-sun"></use>
+              </svg>
             </button>
           </div>
-          <label class="toc-overlay-icon toc-content-icon" for="__toc">
+          <label class="toc-overlay-icon toc-header-icon" for="__toc">
             <div class="visually-hidden">Toggle table of contents sidebar</div>
-            <i class="icon"><svg><use href="#svg-toc"></use></svg></i>
+            <i class="icon"
+              ><svg><use href="#svg-toc"></use></svg
+            ></i>
           </label>
         </div>
-        <article role="main">
-          <div class="section" id="pipeline-summary-and-metrics">
-<h1>Pipeline summary and metrics<a class="headerlink" href="#pipeline-summary-and-metrics" title="Permalink to this heading">#</a></h1>
-<p><img alt="" src="_images/overview.png" /></p>
-<p>High level, the overall pipeline includes:</p>
-<ul class="simple">
-<li><p>Named Entity Recognition (NER) model to extract skill, multi skill or experience entities in job adverts;</p></li>
-<li><p>Support Vector Machine (SVM) model to predict whether the skill entity is a skill or multiskill; if multiskill, apply rules to split multiskills into individual skill entities;</p></li>
-<li><p>Embed all entities (skill and multi skill entities) and taxonomy skills using huggingface’s <a class="reference external" href="https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2">sentence-transformers/all-MiniLM-L6-v2</a> pre-trained model;</p></li>
-<li><p>Map extracted skills (skill and multi skill) onto taxonomy skills using cosine similarity of embeddings.</p></li>
-</ul>
-<p>For further information or feedback please contact Liz Gallagher, India Kerle or Cath Sleeman.</p>
-<div class="section" id="intended-use">
-<h2>Intended Use<a class="headerlink" href="#intended-use" title="Permalink to this heading">#</a></h2>
-<ul class="simple">
-<li><p>Extract skills from online job adverts and match extracted skills to a user’s skill taxonomy of choice, such as the European Commission’s European Skills, Competences, and Occupations (ESCO) or Lightcast’s Open Skills.</p></li>
-<li><p>Intended users include researchers in labour statistics or related government bodies.</p></li>
-</ul>
-</div>
-<div class="section" id="out-of-scope-uses">
-<h2>Out of Scope Uses<a class="headerlink" href="#out-of-scope-uses" title="Permalink to this heading">#</a></h2>
-<ul class="simple">
-<li><p>Out of scope is extracting and matching skills from job adverts in non-English languages; extracting and matching skills from texts other than job adverts; drawing conclusions on new, unidentified skills.</p></li>
-<li><p>Skills extracted should not be used to determine skill demand without expert steer and input nor should be used for any discriminatory hiring practices.</p></li>
-</ul>
-</div>
-<div class="section" id="metrics">
-<h2>Metrics<a class="headerlink" href="#metrics" title="Permalink to this heading">#</a></h2>
-<p>There is no exact way to evaluate how well our pipeline works; however we have several proxies to better understand how our approach compares.</p>
-<div class="section" id="comparison-1-top-skill-groups-per-occupation-comparison-to-esco-essential-skill-groups-per-occupation">
-<h3>Comparison 1 - Top skill groups per occupation comparison to ESCO essential skill groups per occupation<a class="headerlink" href="#comparison-1-top-skill-groups-per-occupation-comparison-to-esco-essential-skill-groups-per-occupation" title="Permalink to this heading">#</a></h3>
-<p>The ESCO dataset also includes information on the essential skills per occupation. We compare ESCO’s essential skill groups per occupation with the top ESCO-mapped skill groups per occupation. We identify top skills per occupation by:</p>
-<ul class="simple">
-<li><p>Identifying occupations for which we have at least 100 job adverts;</p></li>
-<li><p>Identify skills extracted at ONLY the skill level;</p></li>
-<li><p>Identify a top skill threshold by calculating the 75 percentile % of skills counts for a given occupation</p></li>
-<li><p>Identify the % of top ESCO-mapped skill groups in ESCO’s essential skill groups per occupation</p></li>
-</ul>
-<p>At a high level, we find that:</p>
-<ul class="simple">
-<li><p>58 occupations with 100 or more job adverts were found in both ESCO and a sample of deduplicated 100,000 job adverts</p></li>
-<li><p>The average # of adverts per occupation is 345.54</p></li>
-<li><p>We extract essential ESCO skills, transversal skills and additional skills</p></li>
-<li><p>On average, 94.5 percent of essential ESCO skill groups were also in the top skill groups extracted per occupation</p></li>
-<li><p>The median percent of essential ESCO skills per occupation that were extracted from our algorithm is 97.84.</p></li>
-</ul>
-</div>
-<div class="section" id="comparison-2-degree-of-overlap-between-lightcasts-extracted-skills-and-our-lightcast-skills">
-<h3>Comparison 2 - Degree of overlap between Lightcast’s extracted skills and our Lightcast skills<a class="headerlink" href="#comparison-2-degree-of-overlap-between-lightcasts-extracted-skills-and-our-lightcast-skills" title="Permalink to this heading">#</a></h3>
-<p>We compare extracted Lightcast skills from Lightcasts’ Open Skills algorithm and our current approach from 99 job adverts, with a minimum cosine similarity threshold between an extracted skill and taxonomy skill set to 0 to guarantee we only match at the skill level</p>
-<p>We found:</p>
-<ul class="simple">
-<li><p>We extract an average of 10.22 skills per job advert while Lightcast’s Open Skills algorithm extracts an average of 6.42 skills per job advert</p></li>
-<li><p>There no overlap for 40% of job adverts between the two approaches</p></li>
-<li><p>Of the job adverts where there is overlap, on average, 39.3% of extracted Lightcast skills are present in our current approach. The median percentage is 33.3%.</p></li>
-<li><p>Qualitatively, there are a number of limitations to the degree of overlap approach for comparison:</p></li>
-<li><p>The two skill lists may contain very similar skills i.e. Financial Accounting vs. Finance but will be considered different as a result</p></li>
-<li><p>For exact comparison, we set the cosine similarity threshold to 0 to guarantee extracted skill-level skills but would otherwise not do so. This allows for inappropriate skill matches i.e. ‘Eye Examination’ for a supply chain role</p></li>
-<li><p>Lightcast’s algorithm may not be a single source of truth and it also extracts inappropriate skill matches i.e. ‘Flooring’ for a care assistant role</p></li>
-</ul>
-</div>
-<div class="section" id="evaluation-1-manual-judgement-of-false-positive-rate">
-<h3>Evaluation 1 - Manual judgement of false positive rate<a class="headerlink" href="#evaluation-1-manual-judgement-of-false-positive-rate" title="Permalink to this heading">#</a></h3>
-<p>We looked at the ESCO-mapped skills extracted from a random sample of 64 job adverts, and manually judged how many skills shouldn’t have been extracted from the job advert i.e. the false positives. We also performed this analysis when looking at the skills extracted from 22 job adverts using Lightcast’s Skills Extractor API.</p>
-<ul class="simple">
-<li><p>Our results showed on average 27% of the skills extracted from a job advert are false positives.</p></li>
-<li><p>For Lightcast, on average 12% of the skills extracted from a job advert are false positives.</p></li>
-</ul>
-</div>
-<div class="section" id="evaluation-2-manual-judgement-of-skills-extraction-and-mapping-quality">
-<h3>Evaluation 2 - Manual judgement of skills extraction and mapping quality<a class="headerlink" href="#evaluation-2-manual-judgement-of-skills-extraction-and-mapping-quality" title="Permalink to this heading">#</a></h3>
-<p>We manually tagged a random sample of skills extracted from job adverts, with whether we thought they were inappropriate, OK or excellent skill entities, and whether we thought they had inappropriate, OK or excellent matches to ESCO skills.</p>
-<ul class="simple">
-<li><p>We felt that out of 183 skill entities 73% were excellent entities, 19% were OK and 8% were inappropriate.</p></li>
-<li><p>172 out of 183 skill entities were matched to ESCO skills.</p></li>
-<li><p>Of the 172 matched skill entities we felt 53% were excellently matched, 30% were OK and 17% were inappropriate.</p></li>
-</ul>
-<div class="table-wrapper colwidths-auto docutils container">
-<table class="colwidths-auto docutils align-default">
-<thead>
-<tr class="row-odd"><th class="head"><p>Skill entity quality</p></th>
-<th class="head"><p>ESCO match quality</p></th>
-<th class="head"><p>count</p></th>
-</tr>
-</thead>
-<tbody>
-<tr class="row-even"><td><p>Inappropriate</p></td>
-<td><p>Inappropriate</p></td>
-<td><p>9</p></td>
-</tr>
-<tr class="row-odd"><td><p>Inappropriate</p></td>
-<td><p>OK</p></td>
-<td><p>1</p></td>
-</tr>
-<tr class="row-even"><td><p>OK</p></td>
-<td><p>Inappropriate</p></td>
-<td><p>9</p></td>
-</tr>
-<tr class="row-odd"><td><p>OK</p></td>
-<td><p>OK</p></td>
-<td><p>16</p></td>
-</tr>
-<tr class="row-even"><td><p>OK</p></td>
-<td><p>Excellent</p></td>
-<td><p>7</p></td>
-</tr>
-<tr class="row-odd"><td><p>Excellent</p></td>
-<td><p>Inappropriate</p></td>
-<td><p>11</p></td>
-</tr>
-<tr class="row-even"><td><p>Excellent</p></td>
-<td><p>OK</p></td>
-<td><p>35</p></td>
-</tr>
-<tr class="row-odd"><td><p>Excellent</p></td>
-<td><p>Excellent</p></td>
-<td><p>83</p></td>
-</tr>
-</tbody>
-</table>
-</div>
-<ul class="simple">
-<li><p>87% of the matches were to either an individual skill or the lowest level of the skills taxonomy (level 3).</p></li>
-<li><p>The match quality is at its best when the skill entity is matched to an individual ESCO skill.</p></li>
-</ul>
-<div class="table-wrapper colwidths-auto docutils container">
-<table class="colwidths-auto docutils align-default">
-<thead>
-<tr class="row-odd"><th class="head"><p>Taxonomy level mapped to</p></th>
-<th class="head"><p>Number in sample</p></th>
-<th class="head"><p>Average match quality score (0-inappropriate, 1-OK, 2-excellent)</p></th>
-</tr>
-</thead>
-<tbody>
-<tr class="row-even"><td><p>Skill</p></td>
-<td><p>99</p></td>
-<td><p>1.71</p></td>
-</tr>
-<tr class="row-odd"><td><p>Skill hierarchy level 3</p></td>
-<td><p>51</p></td>
-<td><p>0.90</p></td>
-</tr>
-<tr class="row-even"><td><p>Attitudes hierarchy</p></td>
-<td><p>8</p></td>
-<td><p>1.63</p></td>
-</tr>
-<tr class="row-odd"><td><p>Skill hierarchy level 2</p></td>
-<td><p>6</p></td>
-<td><p>0.33</p></td>
-</tr>
-<tr class="row-even"><td><p>Knoweldge hierarchy</p></td>
-<td><p>6</p></td>
-<td><p>0.17</p></td>
-</tr>
-<tr class="row-odd"><td><p>Transversal hierarchy</p></td>
-<td><p>1</p></td>
-<td><p>1.00</p></td>
-</tr>
-</tbody>
-</table>
-</div>
-</div>
-</div>
-</div>
-
-        </article>
-      </div>
-      <footer>
-        
-        <div class="related-pages">
-          <a class="next-page" href="model_card.html">
-              <div class="page-info">
-                <div class="context">
-                  <span>Next</span>
-                </div>
-                <div class="title">Model Cards</div>
+      </header>
+      <aside class="sidebar-drawer">
+        <div class="sidebar-container">
+          <div class="sidebar-sticky">
+            <a class="sidebar-brand centered" href="index.html">
+              <div class="sidebar-logo-container">
+                <img
+                  class="sidebar-logo"
+                  src="_static/nesta_escoe_transparent.png"
+                  alt="Logo"
+                />
               </div>
-              <svg class="furo-related-icon"><use href="#svg-arrow-right"></use></svg>
+
+              <span class="sidebar-brand-text"
+                >Skills Extractor v1.0.1 documentation</span
+              >
             </a>
-          <a class="prev-page" href="custom_usage.html">
-              <svg class="furo-related-icon"><use href="#svg-arrow-right"></use></svg>
-              <div class="page-info">
-                <div class="context">
-                  <span>Previous</span>
-                </div>
-                
-                <div class="title">Custom Usage</div>
-                
+            <form
+              class="sidebar-search-container"
+              method="get"
+              action="search.html"
+              role="search"
+            >
+              <input
+                class="sidebar-search"
+                placeholder="Search"
+                name="q"
+                aria-label="Search"
+              />
+              <input type="hidden" name="check_keywords" value="yes" />
+              <input type="hidden" name="area" value="default" />
+            </form>
+            <div id="searchbox"></div>
+            <div class="sidebar-scroll">
+              <div class="sidebar-tree">
+                <ul class="current">
+                  <li class="toctree-l1">
+                    <a class="reference internal" href="about.html"
+                      >Skills Extractor</a
+                    >
+                  </li>
+                  <li class="toctree-l1">
+                    <a class="reference internal" href="custom_usage.html"
+                      >Custom Usage</a
+                    >
+                  </li>
+                  <li class="toctree-l1 current current-page">
+                    <a class="current reference internal" href="#"
+                      >Pipeline summary and metrics</a
+                    >
+                  </li>
+                  <li class="toctree-l1">
+                    <a class="reference internal" href="model_card.html"
+                      >Model Cards</a
+                    >
+                  </li>
+                  <li class="toctree-l1">
+                    <a class="reference internal" href="labelling.html"
+                      >Entity Labelling</a
+                    >
+                  </li>
+                  <li class="toctree-l1">
+                    <a class="reference internal" href="extract_skills.html"
+                      >The
+                      <code class="docutils literal notranslate"
+                        ><span class="pre">ExtractSkills</span></code
+                      >
+                      class</a
+                    >
+                  </li>
+                  <li class="toctree-l1">
+                    <a class="reference internal" href="license.html"
+                      >The MIT License (MIT)</a
+                    >
+                  </li>
+                </ul>
               </div>
-            </a>
-        </div>
-        <div class="bottom-of-page">
-          <div class="left-details">
-            <div class="copyright">
-                Copyright &#169; 2022, Liz Gallagher, India Kerle
             </div>
-            Made with <a href="https://www.sphinx-doc.org/">Sphinx</a> and <a class="muted-link" href="https://pradyunsg.me">@pradyunsg</a>'s
-            
-            <a href="https://github.com/pradyunsg/furo">Furo</a>
-            
           </div>
-          <div class="right-details">
-            <div class="icons">
-              
+        </div>
+      </aside>
+      <div class="main">
+        <div class="content">
+          <div class="article-container">
+            <a href="#" class="back-to-top muted-link">
+              <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24">
+                <path
+                  d="M13 20h-2V8l-5.5 5.5-1.42-1.42L12 4.16l7.92 7.92-1.42 1.42L13 8v12z"
+                ></path>
+              </svg>
+              <span>Back to top</span>
+            </a>
+            <div class="content-icon-container">
+              <div class="theme-toggle-container theme-toggle-content">
+                <button class="theme-toggle">
+                  <div class="visually-hidden">
+                    Toggle Light / Dark / Auto color theme
+                  </div>
+                  <svg class="theme-icon-when-auto">
+                    <use href="#svg-sun-half"></use>
+                  </svg>
+                  <svg class="theme-icon-when-dark">
+                    <use href="#svg-moon"></use>
+                  </svg>
+                  <svg class="theme-icon-when-light">
+                    <use href="#svg-sun"></use>
+                  </svg>
+                </button>
+              </div>
+              <label class="toc-overlay-icon toc-content-icon" for="__toc">
+                <div class="visually-hidden">
+                  Toggle table of contents sidebar
+                </div>
+                <i class="icon"
+                  ><svg><use href="#svg-toc"></use></svg
+                ></i>
+              </label>
             </div>
+            <article role="main">
+              <section id="pipeline-summary-and-metrics">
+                <h1>
+                  Pipeline summary and metrics<a
+                    class="headerlink"
+                    href="#pipeline-summary-and-metrics"
+                    title="Permalink to this heading"
+                    >#</a
+                  >
+                </h1>
+                <p><img alt="" src="_images/overview.png" /></p>
+                <p>High level, the overall pipeline includes:</p>
+                <ul class="simple">
+                  <li>
+                    <p>
+                      Named Entity Recognition (NER) model to extract skill,
+                      multi skill or experience entities in job adverts;
+                    </p>
+                  </li>
+                  <li>
+                    <p>
+                      Support Vector Machine (SVM) model to predict whether the
+                      skill entity is a skill or multiskill; if multiskill,
+                      apply rules to split multiskills into individual skill
+                      entities;
+                    </p>
+                  </li>
+                  <li>
+                    <p>
+                      Embed all entities (skill and multi skill entities) and
+                      taxonomy skills using huggingface’s
+                      <a
+                        class="reference external"
+                        href="https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2"
+                        >sentence-transformers/all-MiniLM-L6-v2</a
+                      >
+                      pre-trained model;
+                    </p>
+                  </li>
+                  <li>
+                    <p>
+                      Map extracted skills (skill and multi skill) onto taxonomy
+                      skills using cosine similarity of embeddings.
+                    </p>
+                  </li>
+                </ul>
+                <p>
+                  For further information or feedback please contact Liz
+                  Gallagher, India Kerle or Cath Sleeman.
+                </p>
+                <section id="intended-use">
+                  <h2>
+                    Intended Use<a
+                      class="headerlink"
+                      href="#intended-use"
+                      title="Permalink to this heading"
+                      >#</a
+                    >
+                  </h2>
+                  <ul class="simple">
+                    <li>
+                      <p>
+                        Extract skills from online job adverts and match
+                        extracted skills to a user’s skill taxonomy of choice,
+                        such as the European Commission’s European Skills,
+                        Competences, and Occupations (ESCO) or Lightcast’s Open
+                        Skills.
+                      </p>
+                    </li>
+                    <li>
+                      <p>
+                        Intended users include researchers in labour statistics
+                        or related government bodies.
+                      </p>
+                    </li>
+                  </ul>
+                </section>
+                <section id="out-of-scope-uses">
+                  <h2>
+                    Out of Scope Uses<a
+                      class="headerlink"
+                      href="#out-of-scope-uses"
+                      title="Permalink to this heading"
+                      >#</a
+                    >
+                  </h2>
+                  <ul class="simple">
+                    <li>
+                      <p>
+                        Out of scope is extracting and matching skills from job
+                        adverts in non-English languages; extracting and
+                        matching skills from texts other than job adverts;
+                        drawing conclusions on new, unidentified skills.
+                      </p>
+                    </li>
+                    <li>
+                      <p>
+                        Skills extracted should not be used to determine skill
+                        demand without expert steer and input nor should be used
+                        for any discriminatory hiring practices.
+                      </p>
+                    </li>
+                  </ul>
+                </section>
+                <section id="metrics">
+                  <h2>
+                    Metrics<a
+                      class="headerlink"
+                      href="#metrics"
+                      title="Permalink to this heading"
+                      >#</a
+                    >
+                  </h2>
+                  <p>
+                    There is no exact way to evaluate how well our pipeline
+                    works; however we have several proxies to better understand
+                    how our approach compares.
+                  </p>
+                  <section
+                    id="comparison-1-top-skill-groups-per-occupation-comparison-to-esco-essential-skill-groups-per-occupation"
+                  >
+                    <h3>
+                      Comparison 1 - Top skill groups per occupation comparison
+                      to ESCO essential skill groups per occupation<a
+                        class="headerlink"
+                        href="#comparison-1-top-skill-groups-per-occupation-comparison-to-esco-essential-skill-groups-per-occupation"
+                        title="Permalink to this heading"
+                        >#</a
+                      >
+                    </h3>
+                    <p>
+                      The ESCO dataset also includes information on the
+                      essential skills per occupation. We compare ESCO’s
+                      essential skill groups per occupation with the top
+                      ESCO-mapped skill groups per occupation. We identify top
+                      skills per occupation by:
+                    </p>
+                    <ul class="simple">
+                      <li>
+                        <p>
+                          Identifying occupations for which we have at least 100
+                          job adverts;
+                        </p>
+                      </li>
+                      <li>
+                        <p>
+                          Identify skills extracted at ONLY the skill level;
+                        </p>
+                      </li>
+                      <li>
+                        <p>
+                          Identify a top skill threshold by calculating the 75
+                          percentile % of skills counts for a given occupation
+                        </p>
+                      </li>
+                      <li>
+                        <p>
+                          Identify the % of top ESCO-mapped skill groups in
+                          ESCO’s essential skill groups per occupation
+                        </p>
+                      </li>
+                    </ul>
+                    <p>At a high level, we find that:</p>
+                    <ul class="simple">
+                      <li>
+                        <p>
+                          58 occupations with 100 or more job adverts were found
+                          in both ESCO and a sample of deduplicated 100,000 job
+                          adverts
+                        </p>
+                      </li>
+                      <li>
+                        <p>The average # of adverts per occupation is 345.54</p>
+                      </li>
+                      <li>
+                        <p>
+                          We extract essential ESCO skills, transversal skills
+                          and additional skills
+                        </p>
+                      </li>
+                      <li>
+                        <p>
+                          On average, 94.5 percent of essential ESCO skill
+                          groups were also in the top skill groups extracted per
+                          occupation
+                        </p>
+                      </li>
+                      <li>
+                        <p>
+                          The median percent of essential ESCO skills per
+                          occupation that were extracted from our algorithm is
+                          97.84.
+                        </p>
+                      </li>
+                    </ul>
+                  </section>
+                  <section
+                    id="comparison-2-degree-of-overlap-between-lightcasts-extracted-skills-and-our-lightcast-skills"
+                  >
+                    <h3>
+                      Comparison 2 - Degree of overlap between Lightcast’s
+                      extracted skills and our Lightcast skills<a
+                        class="headerlink"
+                        href="#comparison-2-degree-of-overlap-between-lightcasts-extracted-skills-and-our-lightcast-skills"
+                        title="Permalink to this heading"
+                        >#</a
+                      >
+                    </h3>
+                    <p>
+                      We compare extracted Lightcast skills from Lightcasts’
+                      Open Skills algorithm and our current approach from 99 job
+                      adverts, with a minimum cosine similarity threshold
+                      between an extracted skill and taxonomy skill set to 0 to
+                      guarantee we only match at the skill level
+                    </p>
+                    <p>We found:</p>
+                    <ul class="simple">
+                      <li>
+                        <p>
+                          We extract an average of 10.22 skills per job advert
+                          while Lightcast’s Open Skills algorithm extracts an
+                          average of 6.42 skills per job advert
+                        </p>
+                      </li>
+                      <li>
+                        <p>
+                          There no overlap for 40% of job adverts between the
+                          two approaches
+                        </p>
+                      </li>
+                      <li>
+                        <p>
+                          Of the job adverts where there is overlap, on average,
+                          39.3% of extracted Lightcast skills are present in our
+                          current approach. The median percentage is 33.3%.
+                        </p>
+                      </li>
+                      <li>
+                        <p>
+                          Qualitatively, there are a number of limitations to
+                          the degree of overlap approach for comparison:
+                        </p>
+                      </li>
+                      <li>
+                        <p>
+                          The two skill lists may contain very similar skills
+                          i.e. Financial Accounting vs. Finance but will be
+                          considered different as a result
+                        </p>
+                      </li>
+                      <li>
+                        <p>
+                          For exact comparison, we set the cosine similarity
+                          threshold to 0 to guarantee extracted skill-level
+                          skills but would otherwise not do so. This allows for
+                          inappropriate skill matches i.e. ‘Eye Examination’ for
+                          a supply chain role
+                        </p>
+                      </li>
+                      <li>
+                        <p>
+                          Lightcast’s algorithm may not be a single source of
+                          truth and it also extracts inappropriate skill matches
+                          i.e. ‘Flooring’ for a care assistant role
+                        </p>
+                      </li>
+                    </ul>
+                  </section>
+                  <section
+                    id="evaluation-1-manual-judgement-of-false-positive-rate"
+                  >
+                    <h3>
+                      Evaluation 1 - Manual judgement of false positive rate<a
+                        class="headerlink"
+                        href="#evaluation-1-manual-judgement-of-false-positive-rate"
+                        title="Permalink to this heading"
+                        >#</a
+                      >
+                    </h3>
+                    <p>
+                      We looked at the ESCO-mapped skills extracted from a
+                      random sample of 64 job adverts, and manually judged how
+                      many skills shouldn’t have been extracted from the job
+                      advert i.e. the false positives. We also performed this
+                      analysis when looking at the skills extracted from 22 job
+                      adverts using Lightcast’s Skills Extractor API.
+                    </p>
+                    <ul class="simple">
+                      <li>
+                        <p>
+                          Our results showed on average 27% of the skills
+                          extracted from a job advert are false positives.
+                        </p>
+                      </li>
+                      <li>
+                        <p>
+                          For Lightcast, on average 12% of the skills extracted
+                          from a job advert are false positives.
+                        </p>
+                      </li>
+                    </ul>
+                  </section>
+                  <section
+                    id="evaluation-2-manual-judgement-of-skills-extraction-and-mapping-quality"
+                  >
+                    <h3>
+                      Evaluation 2 - Manual judgement of skills extraction and
+                      mapping quality<a
+                        class="headerlink"
+                        href="#evaluation-2-manual-judgement-of-skills-extraction-and-mapping-quality"
+                        title="Permalink to this heading"
+                        >#</a
+                      >
+                    </h3>
+                    <p>
+                      We manually tagged a random sample of skills extracted
+                      from job adverts, with whether we thought they were
+                      inappropriate, OK or excellent skill entities, and whether
+                      we thought they had inappropriate, OK or excellent matches
+                      to ESCO skills.
+                    </p>
+                    <ul class="simple">
+                      <li>
+                        <p>
+                          We felt that out of 183 skill entities 73% were
+                          excellent entities, 19% were OK and 8% were
+                          inappropriate.
+                        </p>
+                      </li>
+                      <li>
+                        <p>
+                          172 out of 183 skill entities were matched to ESCO
+                          skills.
+                        </p>
+                      </li>
+                      <li>
+                        <p>
+                          Of the 172 matched skill entities we felt 53% were
+                          excellently matched, 30% were OK and 17% were
+                          inappropriate.
+                        </p>
+                      </li>
+                    </ul>
+                    <div
+                      class="table-wrapper colwidths-auto docutils container"
+                    >
+                      <table class="docutils align-default">
+                        <thead>
+                          <tr class="row-odd">
+                            <th class="head"><p>Skill entity quality</p></th>
+                            <th class="head"><p>ESCO match quality</p></th>
+                            <th class="head"><p>count</p></th>
+                          </tr>
+                        </thead>
+                        <tbody>
+                          <tr class="row-even">
+                            <td><p>Inappropriate</p></td>
+                            <td><p>Inappropriate</p></td>
+                            <td><p>9</p></td>
+                          </tr>
+                          <tr class="row-odd">
+                            <td><p>Inappropriate</p></td>
+                            <td><p>OK</p></td>
+                            <td><p>1</p></td>
+                          </tr>
+                          <tr class="row-even">
+                            <td><p>OK</p></td>
+                            <td><p>Inappropriate</p></td>
+                            <td><p>9</p></td>
+                          </tr>
+                          <tr class="row-odd">
+                            <td><p>OK</p></td>
+                            <td><p>OK</p></td>
+                            <td><p>16</p></td>
+                          </tr>
+                          <tr class="row-even">
+                            <td><p>OK</p></td>
+                            <td><p>Excellent</p></td>
+                            <td><p>7</p></td>
+                          </tr>
+                          <tr class="row-odd">
+                            <td><p>Excellent</p></td>
+                            <td><p>Inappropriate</p></td>
+                            <td><p>11</p></td>
+                          </tr>
+                          <tr class="row-even">
+                            <td><p>Excellent</p></td>
+                            <td><p>OK</p></td>
+                            <td><p>35</p></td>
+                          </tr>
+                          <tr class="row-odd">
+                            <td><p>Excellent</p></td>
+                            <td><p>Excellent</p></td>
+                            <td><p>83</p></td>
+                          </tr>
+                        </tbody>
+                      </table>
+                    </div>
+                    <ul class="simple">
+                      <li>
+                        <p>
+                          87% of the matches were to either an individual skill
+                          or the lowest level of the skills taxonomy (level 3).
+                        </p>
+                      </li>
+                      <li>
+                        <p>
+                          The match quality is at its best when the skill entity
+                          is matched to an individual ESCO skill.
+                        </p>
+                      </li>
+                    </ul>
+                    <div
+                      class="table-wrapper colwidths-auto docutils container"
+                    >
+                      <table class="docutils align-default">
+                        <thead>
+                          <tr class="row-odd">
+                            <th class="head">
+                              <p>Taxonomy level mapped to</p>
+                            </th>
+                            <th class="head"><p>Number in sample</p></th>
+                            <th class="head">
+                              <p>
+                                Average match quality score (0-inappropriate,
+                                1-OK, 2-excellent)
+                              </p>
+                            </th>
+                          </tr>
+                        </thead>
+                        <tbody>
+                          <tr class="row-even">
+                            <td><p>Skill</p></td>
+                            <td><p>99</p></td>
+                            <td><p>1.71</p></td>
+                          </tr>
+                          <tr class="row-odd">
+                            <td><p>Skill hierarchy level 3</p></td>
+                            <td><p>51</p></td>
+                            <td><p>0.90</p></td>
+                          </tr>
+                          <tr class="row-even">
+                            <td><p>Attitudes hierarchy</p></td>
+                            <td><p>8</p></td>
+                            <td><p>1.63</p></td>
+                          </tr>
+                          <tr class="row-odd">
+                            <td><p>Skill hierarchy level 2</p></td>
+                            <td><p>6</p></td>
+                            <td><p>0.33</p></td>
+                          </tr>
+                          <tr class="row-even">
+                            <td><p>Knoweldge hierarchy</p></td>
+                            <td><p>6</p></td>
+                            <td><p>0.17</p></td>
+                          </tr>
+                          <tr class="row-odd">
+                            <td><p>Transversal hierarchy</p></td>
+                            <td><p>1</p></td>
+                            <td><p>1.00</p></td>
+                          </tr>
+                        </tbody>
+                      </table>
+                    </div>
+                  </section>
+                </section>
+              </section>
+            </article>
           </div>
-        </div>
-        
-      </footer>
-    </div>
-    <aside class="toc-drawer">
-      
-      
-      <div class="toc-sticky toc-scroll">
-        <div class="toc-title-container">
-          <span class="toc-title">
-            On this page
-          </span>
-        </div>
-        <div class="toc-tree-container">
-          <div class="toc-tree">
-            <ul>
-<li><a class="reference internal" href="#">Pipeline summary and metrics</a><ul>
-<li><a class="reference internal" href="#intended-use">Intended Use</a></li>
-<li><a class="reference internal" href="#out-of-scope-uses">Out of Scope Uses</a></li>
-<li><a class="reference internal" href="#metrics">Metrics</a><ul>
-<li><a class="reference internal" href="#comparison-1-top-skill-groups-per-occupation-comparison-to-esco-essential-skill-groups-per-occupation">Comparison 1 - Top skill groups per occupation comparison to ESCO essential skill groups per occupation</a></li>
-<li><a class="reference internal" href="#comparison-2-degree-of-overlap-between-lightcasts-extracted-skills-and-our-lightcast-skills">Comparison 2 - Degree of overlap between Lightcast’s extracted skills and our Lightcast skills</a></li>
-<li><a class="reference internal" href="#evaluation-1-manual-judgement-of-false-positive-rate">Evaluation 1 - Manual judgement of false positive rate</a></li>
-<li><a class="reference internal" href="#evaluation-2-manual-judgement-of-skills-extraction-and-mapping-quality">Evaluation 2 - Manual judgement of skills extraction and mapping quality</a></li>
-</ul>
-</li>
-</ul>
-</li>
-</ul>
+          <footer>
+            <div class="related-pages">
+              <a class="next-page" href="model_card.html">
+                <div class="page-info">
+                  <div class="context">
+                    <span>Next</span>
+                  </div>
+                  <div class="title">Model Cards</div>
+                </div>
+                <svg class="furo-related-icon">
+                  <use href="#svg-arrow-right"></use>
+                </svg>
+              </a>
+              <a class="prev-page" href="custom_usage.html">
+                <svg class="furo-related-icon">
+                  <use href="#svg-arrow-right"></use>
+                </svg>
+                <div class="page-info">
+                  <div class="context">
+                    <span>Previous</span>
+                  </div>
 
-          </div>
+                  <div class="title">Custom Usage</div>
+                </div>
+              </a>
+            </div>
+            <div class="bottom-of-page">
+              <div class="left-details">
+                <div class="copyright">
+                  Copyright &#169; 2022, Liz Gallagher, India Kerle
+                </div>
+                Made with <a href="https://www.sphinx-doc.org/">Sphinx</a> and
+                <a class="muted-link" href="https://pradyunsg.me">@pradyunsg</a
+                >'s
+
+                <a href="https://github.com/pradyunsg/furo">Furo</a>
+              </div>
+              <div class="right-details">
+                <div class="icons"></div>
+              </div>
+            </div>
+          </footer>
         </div>
+        <aside class="toc-drawer">
+          <div class="toc-sticky toc-scroll">
+            <div class="toc-title-container">
+              <span class="toc-title"> On this page </span>
+            </div>
+            <div class="toc-tree-container">
+              <div class="toc-tree">
+                <ul>
+                  <li>
+                    <a class="reference internal" href="#"
+                      >Pipeline summary and metrics</a
+                    >
+                    <ul>
+                      <li>
+                        <a class="reference internal" href="#intended-use"
+                          >Intended Use</a
+                        >
+                      </li>
+                      <li>
+                        <a class="reference internal" href="#out-of-scope-uses"
+                          >Out of Scope Uses</a
+                        >
+                      </li>
+                      <li>
+                        <a class="reference internal" href="#metrics"
+                          >Metrics</a
+                        >
+                        <ul>
+                          <li>
+                            <a
+                              class="reference internal"
+                              href="#comparison-1-top-skill-groups-per-occupation-comparison-to-esco-essential-skill-groups-per-occupation"
+                              >Comparison 1 - Top skill groups per occupation
+                              comparison to ESCO essential skill groups per
+                              occupation</a
+                            >
+                          </li>
+                          <li>
+                            <a
+                              class="reference internal"
+                              href="#comparison-2-degree-of-overlap-between-lightcasts-extracted-skills-and-our-lightcast-skills"
+                              >Comparison 2 - Degree of overlap between
+                              Lightcast’s extracted skills and our Lightcast
+                              skills</a
+                            >
+                          </li>
+                          <li>
+                            <a
+                              class="reference internal"
+                              href="#evaluation-1-manual-judgement-of-false-positive-rate"
+                              >Evaluation 1 - Manual judgement of false positive
+                              rate</a
+                            >
+                          </li>
+                          <li>
+                            <a
+                              class="reference internal"
+                              href="#evaluation-2-manual-judgement-of-skills-extraction-and-mapping-quality"
+                              >Evaluation 2 - Manual judgement of skills
+                              extraction and mapping quality</a
+                            >
+                          </li>
+                        </ul>
+                      </li>
+                    </ul>
+                  </li>
+                </ul>
+              </div>
+            </div>
+          </div>
+        </aside>
       </div>
-      
-      
-    </aside>
-  </div>
-</div><script data-url_root="./" id="documentation_options" src="_static/documentation_options.js"></script>
+    </div>
+    <script
+      data-url_root="./"
+      id="documentation_options"
+      src="_static/documentation_options.js"
+    ></script>
     <script src="_static/jquery.js"></script>
     <script src="_static/underscore.js"></script>
     <script src="_static/_sphinx_javascript_frameworks_compat.js"></script>
     <script src="_static/doctools.js"></script>
     <script src="_static/sphinx_highlight.js"></script>
     <script src="_static/scripts/furo.js"></script>
-    </body>
-</html>
\ No newline at end of file
+  </body>
+</html>
diff --git a/docs/build/html/searchindex.js b/docs/build/html/searchindex.js
index c69ac25d..b38bbfa4 100644
--- a/docs/build/html/searchindex.js
+++ b/docs/build/html/searchindex.js
@@ -1 +1,1121 @@
-Search.setIndex({"docnames": ["about", "index"], "filenames": ["about.md", "index.rst"], "titles": ["Skills Extractor", "&lt;no title&gt;"], "terms": {"document": 0, "thi": 0, "page": 0, "contain": 0, "inform": 0, "how": 0, "The": [0, 1], "allow": 0, "you": 0, "phrase": 0, "from": 0, "job": 0, "advertis": 0, "text": 0, "them": 0, "onto": 0, "taxonomi": [0, 1], "your": [0, 1], "choic": 0, "we": 0, "current": 0, "support": 0, "three": 0, "differ": 0, "european": 0, "commiss": 0, "compet": 0, "occup": [0, 1], "esco": [0, 1], "lightcast": [0, 1], "open": 0, "toi": 0, "intern": 0, "purpos": 0, "If": 0, "d": 0, "like": 0, "learn": 0, "more": 0, "about": 0, "model": [0, 1], "pleas": 0, "refer": 0, "card": [0, 1], "mai": 0, "also": 0, "want": 0, "read": 0, "wider": 0, "our": [0, 1], "introduct": 0, "blog": 0, "interact": 0, "can": 0, "pip": 0, "ojd": 0, "dap": 0, "need": 0, "download": 0, "spaci": 0, "en_core_web_sm": 0, "python": 0, "m": 0, "when": 0, "packag": 0, "i": 0, "first": 0, "automat": 0, "folder": 0, "neccessari": 0, "data": 0, "file": [0, 1], "1gb": 0, "although": 0, "don": 0, "t": 0, "have": 0, "credenti": 0, "work": 0, "kei": 0, "function": 0, "advert": 0, "list": 0, "option": 0, "local": 0, "fals": [0, 1], "onli": 0, "those": 0, "access": 0, "s3": 0, "bucket": 0, "would": 0, "one": 0, "step": 0, "ar": 0, "abl": 0, "do": 0, "so": 0, "extract_skil": [0, 1], "method": 0, "ojd_daps_skil": 0, "pipelin": [0, 1], "import": 0, "extractskil": [0, 1], "modul": 0, "e": 0, "config_nam": 0, "extract_skills_toi": 0, "true": 0, "instanti": 0, "configur": [0, 1], "load": [0, 1], "necessari": 0, "job_advert": 0, "involv": 0, "commun": 0, "math": 0, "excel": 0, "good": 0, "present": 0, "exampl": 0, "job_skills_match": 0, "match": 0, "output": 0, "follow": 0, "collabor": 0, "creativ": 0, "s1": 0, "comput": 0, "s5": 0, "simpli": 0, "predicted_skil": 0, "get_skil": [0, 1], "experi": 0, "multiskil": [0, 1], "either": 0, "get_stil": 0, "In": 0, "instanc": 0, "skills_list": 0, "skills_list_match": 0, "map_skil": [0, 1], "format": [0, 1], "techniqu": 0, "cdef": 0, "demo": 0, "front": 0, "end": 0, "built": 0, "streamlit": 0, "given": 0, "past": 0, "ani": 0, "extract_skills_lightcast": 0, "extract_skills_esco": 0, "modifi": 0, "sourc": 0, "code": 0, "clone": 0, "run": 0, "git": 0, "github": 0, "com": 0, "nestauk": 0, "meet": 0, "scienc": 0, "cookiecutt": 0, "requir": 0, "brief": 0, "direnv": 0, "conda": 0, "creat": 0, "blank": 0, "log": 0, "mkdir": 0, "state": 0, "touch": 0, "make": 0, "environ": 0, "zip": 0, "click": 0, "here": 0, "after": 0, "unzip": 0, "move": 0, "parent": 0, "split": 0, "core": 0, "skill_ner": 0, "train": [0, 1], "entiti": [0, 1], "recognit": [0, 1], "ner": [0, 1], "skill_ner_map": 0, "an": 0, "exist": 0, "semant": 0, "similar": 0, "user": 0, "friendli": 0, "much": 0, "found": 0, "each": 0, "readm": 0, "some": 0, "check": 0, "pytest": 0, "variou": 0, "piec": 0, "done": 0, "These": 0, "dataset": [0, 1], "privat": 0, "therefor": 0, "design": 0, "technic": 0, "style": 0, "wa": 0, "made": 0, "possibl": 0, "via": 0, "fund": 0, "econom": 0, "statist": 0, "centr": 0, "templat": 0, "base": 0, "doc": 0, "skill": 1, "extractor": 1, "welcom": 1, "nesta": 1, "": 1, "librari": 1, "instal": 1, "aw": 1, "cli": 1, "tl": 1, "dr": 1, "us": 1, "1": 1, "extract": 1, "AND": 1, "map": 1, "2": 1, "3": 1, "app": 1, "develop": 1, "setup": 1, "project": 1, "structur": 1, "test": 1, "analysi": 1, "contributor": 1, "guidelin": 1, "custom": 1, "usag": 1, "predefin": 1, "definit": 1, "own": 1, "defin": 1, "summari": 1, "metric": 1, "intend": 1, "out": 1, "scope": 1, "comparison": 1, "top": 1, "group": 1, "per": 1, "essenti": 1, "degre": 1, "overlap": 1, "between": 1, "evalu": 1, "manual": 1, "judgement": 1, "posit": 1, "rate": 1, "qualiti": 1, "name": 1, "caveat": 1, "recommend": 1, "factor": 1, "label": 1, "class": 1, "format_skil": 1, "mit": 1, "licens": 1}, "objects": {}, "objtypes": {}, "objnames": {}, "titleterms": {"skill": 0, "extractor": 0, "welcom": 0, "nesta": 0, "": 0, "librari": 0, "instal": 0, "name": 0, "aw": 0, "cli": 0, "tl": 0, "dr": 0, "us": 0, "usag": 0, "1": 0, "extract": 0, "AND": 0, "map": 0, "2": 0, "3": 0, "app": 0, "develop": 0, "setup": 0, "project": 0, "structur": 0, "test": 0, "analysi": 0, "contributor": 0, "guidelin": 0}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 8, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.viewcode": 1, "sphinx": 57}, "alltitles": {"Skills Extractor": [[0, "skills-extractor"]], "Welcome to Nesta\u2019s Skills Extractor Library": [[0, "welcome-to-nesta-s-skills-extractor-library"]], "Installation <a name=\"installation\"></a>": [[0, "installation-a-name-installation-a"]], "AWS CLI": [[0, "aws-cli"]], "TL;DR: Using Nesta\u2019s Skills Extractor library <a name=\"usage\"></a>": [[0, "tl-dr-using-nesta-s-skills-extractor-library-a-name-usage-a"]], "1. Extract AND map skills": [[0, "extract-and-map-skills"]], "2. Extract skills": [[0, "extract-skills"]], "3. Map skills": [[0, "map-skills"]], "App": [[0, "app"]], "Development <a name=\"development\"></a>": [[0, "development-a-name-development-a"]], "Setup": [[0, "setup"]], "Project structure": [[0, "project-structure"]], "Testing": [[0, "testing"]], "Analysis": [[0, "analysis"]], "Contributor guidelines": [[0, "contributor-guidelines"]]}, "indexentries": {}})
\ No newline at end of file
+Search.setIndex({
+  docnames: [
+    "about",
+    "custom_usage",
+    "extract_skills",
+    "index",
+    "labelling",
+    "license",
+    "model_card",
+    "pipeline_summary",
+  ],
+  filenames: [
+    "about.md",
+    "custom_usage.md",
+    "extract_skills.rst",
+    "index.rst",
+    "labelling.md",
+    "license.md",
+    "model_card.md",
+    "pipeline_summary.md",
+  ],
+  titles: [
+    "Skills Extractor",
+    "Custom Usage",
+    'The <code class="docutils literal notranslate"><span class="pre">ExtractSkills</span></code> class',
+    "&lt;no title&gt;",
+    "Entity Labelling",
+    "The MIT License (MIT)",
+    "Model Cards",
+    "Pipeline summary and metrics",
+  ],
+  terms: {
+    document: [0, 1, 5],
+    thi: [0, 1, 2, 4, 5, 6, 7],
+    page: [0, 1, 6],
+    contain: [0, 1, 2, 6, 7],
+    inform: [0, 1, 6, 7],
+    how: [0, 7],
+    The: [0, 1, 3, 4, 6, 7],
+    allow: [0, 7],
+    you: [0, 1, 2, 6],
+    phrase: 0,
+    from: [0, 1, 2, 4, 5, 6, 7],
+    job: [0, 1, 2, 4, 6, 7],
+    advertis: 0,
+    text: [0, 1, 2, 6, 7],
+    them: [0, 1, 2, 4, 6],
+    onto: [0, 1, 7],
+    taxonomi: [0, 2, 3, 7],
+    your: [0, 3],
+    choic: [0, 1, 6, 7],
+    we: [0, 1, 4, 6, 7],
+    current: [0, 1, 4, 6, 7],
+    support: [0, 1, 7],
+    three: [0, 1],
+    differ: [0, 6, 7],
+    european: [0, 7],
+    commiss: [0, 7],
+    compet: [0, 7],
+    occup: [0, 3],
+    esco: [0, 1, 3, 6],
+    lightcast: [0, 1, 3],
+    open: [0, 1, 7],
+    toi: [0, 1],
+    intern: 0,
+    purpos: [0, 5],
+    If: [0, 1, 2, 6],
+    d: 0,
+    like: [0, 1, 4],
+    learn: [0, 6],
+    more: [0, 1, 4, 6, 7],
+    about: [0, 1, 4, 6],
+    model: [0, 1, 2, 3, 4, 7],
+    pleas: [0, 1, 7],
+    refer: [0, 1],
+    card: [0, 1, 3],
+    mai: [0, 1, 7],
+    also: [0, 1, 2, 7],
+    want: [0, 1, 2],
+    read: [0, 6],
+    wider: 0,
+    our: [0, 3, 4],
+    introduct: 0,
+    blog: 0,
+    interact: 0,
+    can: [0, 1, 2, 4, 6],
+    pip: 0,
+    ojd: 0,
+    dap: 0,
+    need: [0, 1, 2, 4, 6],
+    download: [0, 2],
+    spaci: 6,
+    en_core_web_sm: [],
+    python: 6,
+    m: [],
+    when: [0, 1, 2, 6, 7],
+    packag: [0, 1],
+    i: [0, 1, 2, 4, 5, 6, 7],
+    first: [0, 4, 6],
+    automat: 0,
+    folder: [0, 1],
+    neccessari: 0,
+    data: [0, 1, 2, 4, 6],
+    file: [0, 2, 3, 5, 6],
+    "1gb": 0,
+    although: 1,
+    don: 1,
+    t: [1, 4, 7],
+    have: [0, 1, 7],
+    credenti: [],
+    work: [0, 4, 6, 7],
+    kei: [0, 2],
+    function: [0, 1, 2],
+    advert: [0, 1, 2, 4, 6, 7],
+    list: [0, 1, 2, 7],
+    option: [0, 1, 2],
+    local: [0, 1, 2],
+    fals: [0, 2, 3],
+    onli: [0, 7],
+    those: 0,
+    access: [0, 2],
+    s3: [0, 2],
+    bucket: [0, 2],
+    would: [0, 7],
+    one: [0, 2, 6],
+    step: [0, 6],
+    ar: [0, 1, 2, 4, 6, 7],
+    abl: [0, 1],
+    do: [0, 1, 5, 6, 7],
+    so: [0, 1, 5, 7],
+    extract_skil: [0, 1, 2, 3],
+    method: 0,
+    ojd_daps_skil: [0, 1, 2],
+    pipelin: [0, 1, 2, 3, 4, 6],
+    import: [0, 1, 6],
+    extractskil: [0, 1, 3],
+    modul: [0, 1],
+    e: [0, 1, 4, 6, 7],
+    config_nam: [0, 1, 2],
+    extract_skills_toi: [0, 1, 2],
+    true: [0, 1, 2],
+    instanti: [0, 1],
+    configur: [0, 3, 6],
+    load: [0, 1, 2, 3],
+    necessari: [0, 2],
+    job_advert: [0, 2],
+    involv: 0,
+    commun: [0, 1],
+    math: 0,
+    excel: [0, 2, 6, 7],
+    good: [0, 2, 6],
+    present: [0, 1, 7],
+    exampl: [0, 1, 4, 6],
+    job_skills_match: 0,
+    match: [0, 1, 2, 6, 7],
+    output: [0, 2, 6],
+    follow: [0, 1, 4, 5, 6],
+    collabor: [0, 1],
+    creativ: [0, 1],
+    s1: [0, 1],
+    comput: 0,
+    s5: [0, 1],
+    simpli: 0,
+    predicted_skil: [0, 2],
+    get_skil: [0, 2, 3],
+    experi: [0, 2, 4, 6, 7],
+    multiskil: [0, 2, 3, 4, 7],
+    either: [0, 2, 7],
+    get_stil: 0,
+    In: [0, 1, 4],
+    instanc: 0,
+    skills_list: 0,
+    skills_list_match: 0,
+    map_skil: [0, 2, 3],
+    format: [0, 2, 3],
+    techniqu: [0, 1],
+    cdef: [0, 1],
+    demo: 0,
+    front: 0,
+    end: [0, 4],
+    built: 0,
+    streamlit: 0,
+    given: [0, 1, 4, 7],
+    past: 0,
+    ani: [0, 2, 5, 7],
+    extract_skills_lightcast: [0, 1],
+    extract_skills_esco: [0, 1],
+    modifi: [0, 5],
+    sourc: [0, 2, 7],
+    code: [0, 1, 2, 6],
+    clone: [0, 1],
+    run: [0, 1, 2],
+    git: 0,
+    github: 0,
+    com: 0,
+    nestauk: 0,
+    meet: 0,
+    scienc: 0,
+    cookiecutt: 0,
+    requir: 0,
+    brief: 0,
+    direnv: 0,
+    conda: 0,
+    creat: [0, 1, 4],
+    blank: [0, 1],
+    log: [0, 2],
+    mkdir: 0,
+    state: 0,
+    touch: 0,
+    make: [0, 4],
+    environ: 0,
+    zip: [],
+    click: [],
+    here: [0, 1, 6],
+    after: [],
+    unzip: [],
+    move: [],
+    parent: [],
+    split: [0, 1, 2, 6, 7],
+    core: [0, 1],
+    skill_ner: [0, 1, 4],
+    train: [0, 1, 2, 3, 7],
+    entiti: [0, 2, 3, 7],
+    recognit: [0, 3, 4, 7],
+    ner: [0, 1, 2, 3, 4, 7],
+    skill_ner_map: [0, 1],
+    an: [0, 4, 5, 6, 7],
+    exist: [0, 1, 2],
+    semant: [0, 6],
+    similar: [0, 6, 7],
+    user: [0, 4, 7],
+    friendli: 0,
+    much: 0,
+    found: [0, 2, 4, 6, 7],
+    each: [0, 1, 2, 4],
+    readm: 0,
+    some: [0, 4],
+    check: 0,
+    pytest: 0,
+    variou: [0, 1],
+    piec: 0,
+    done: 0,
+    These: [0, 1, 6],
+    dataset: [0, 2, 3, 7],
+    privat: [0, 2],
+    therefor: 0,
+    design: 0,
+    technic: 0,
+    style: 0,
+    wa: [0, 6],
+    made: 0,
+    possibl: [0, 1, 2, 6],
+    via: [0, 2],
+    fund: 0,
+    econom: 0,
+    statist: [0, 7],
+    centr: 0,
+    templat: [0, 1],
+    base: [0, 6],
+    doc: 0,
+    skill: [1, 2, 3, 4],
+    extractor: [3, 7],
+    welcom: 3,
+    nesta: [2, 3, 5, 6],
+    "": [1, 2, 3, 6],
+    librari: [3, 6],
+    instal: 3,
+    aw: [],
+    cli: [],
+    tl: 3,
+    dr: 3,
+    us: [1, 2, 3, 4, 5, 6],
+    1: [1, 2, 3, 6],
+    extract: [1, 2, 3, 4, 6],
+    AND: [3, 5],
+    map: [2, 3],
+    2: [1, 3, 6],
+    3: [1, 3, 4, 7],
+    app: 3,
+    develop: [1, 3, 6],
+    setup: [1, 3],
+    project: [1, 3],
+    structur: 3,
+    test: [1, 3, 4, 6],
+    analysi: [3, 7],
+    contributor: 3,
+    guidelin: 3,
+    custom: 3,
+    usag: 3,
+    predefin: [3, 6],
+    definit: [3, 6],
+    own: 3,
+    defin: [3, 4],
+    summari: 3,
+    metric: 3,
+    intend: 3,
+    out: [3, 4, 5, 6],
+    scope: 3,
+    comparison: 3,
+    top: [1, 3],
+    group: [1, 3, 6],
+    per: [3, 6],
+    essenti: 3,
+    degre: 3,
+    overlap: 3,
+    between: [3, 6],
+    evalu: [3, 4, 6],
+    manual: [3, 6],
+    judgement: 3,
+    posit: 3,
+    rate: 3,
+    qualiti: 3,
+    name: [2, 3, 4, 7],
+    caveat: 3,
+    recommend: 3,
+    factor: 3,
+    label: [1, 2, 3, 6],
+    class: [1, 3],
+    format_skil: [2, 3],
+    mit: 3,
+    licens: 3,
+    note: 0,
+    maco: 0,
+    ubuntu: 0,
+    chang: [0, 6],
+    been: [0, 7],
+    compat: 0,
+    window: 0,
+    system: 0,
+    cannot: 0,
+    guarante: [0, 7],
+    contribut: 0,
+    push: 0,
+    new: [0, 7],
+    branch: 0,
+    order: [0, 1, 2],
+    trigger: 0,
+    py: 1,
+    combin: [1, 2],
+    predict: [1, 2, 4, 6, 7],
+    explain: 1,
+    includ: [1, 4, 5, 7],
+    config: [1, 2, 6],
+    anoth: 1,
+    To: [1, 4],
+    repo: 1,
+    main: [1, 6],
+    instruct: 1,
+    particular: [1, 5],
+    There: [1, 4, 7],
+    avail: 1,
+    algorithm: [1, 6, 7],
+    paramet: [1, 2],
+    valu: 1,
+    directori: 1,
+    locat: 1,
+    store: 1,
+    correct: 1,
+    v1: 1,
+    version: 1,
+    22: [1, 7],
+    11: [1, 6, 7],
+    yaml: 1,
+    review: 1,
+    100: [1, 6, 7],
+    ultim: 1,
+    hard: [1, 2, 6],
+    43: [1, 6],
+    most: [1, 6],
+    common: 1,
+    which: [1, 4, 7],
+    were: [1, 4, 6, 7],
+    well: [1, 6, 7],
+    random: [1, 4, 7],
+    sampl: [1, 4, 6, 7],
+    "000": [1, 6, 7],
+    observatori: 1,
+    appropri: 1,
+    everi: [1, 2, 6],
+    descript: 1,
+    ner_model_path: 1,
+    str: [1, 2],
+    rel: [1, 2],
+    path: [1, 2],
+    span: [1, 4, 6],
+    taxonomy_nam: 1,
+    taxonomy_path: 1,
+    must: 1,
+    csv: 1,
+    clean_job_ad: 1,
+    bool: [1, 2],
+    default: [1, 2],
+    whether: [1, 2, 5, 6, 7],
+    perform: [1, 6, 7],
+    light: 1,
+    clean: 1,
+    detect: 1,
+    camelcas: 1,
+    replac: 1,
+    charact: 1,
+    convert: [1, 2],
+    bullet: 1,
+    point: 1,
+    full: 1,
+    stop: 1,
+    min_multiskill_length: 1,
+    int: 1,
+    minimum: [1, 7],
+    length: [1, 2, 6],
+    multi: [1, 2, 4, 6, 7],
+    sentenc: [1, 4, 6, 7],
+    appli: [1, 2, 7],
+    rule: [1, 6, 7],
+    taxonomy_embedding_file_nam: [1, 2],
+    embed: [1, 2, 6, 7],
+    left: [1, 2],
+    unset: [1, 2],
+    gener: [1, 2, 6],
+    prev_skill_matches_file_nam: [1, 2],
+    previou: [1, 2],
+    hard_labelled_skills_file_nam: 1,
+    hier_name_mapper_file_nam: [1, 2],
+    hierarchi: [1, 2, 7],
+    mapper: [1, 2],
+    num_hier_level: 1,
+    number: [1, 2, 6, 7],
+    level: [1, 6, 7],
+    set: [1, 2, 4, 6, 7],
+    0: [1, 6, 7],
+    ha: 1,
+    skill_type_dict: 1,
+    dict: [1, 2],
+    A: [1, 2, 5, 6],
+    dictionari: [1, 2],
+    type: [1, 2],
+    skill_typ: 1,
+    column: 1,
+    hier_typ: 1,
+    least: [1, 7],
+    granular: [1, 6],
+    match_thresholds_dict: 1,
+    threshold: [1, 6, 7],
+    For: [1, 4, 6, 7],
+    skill_match_thresh: 1,
+    7: [1, 6, 7],
+    top_tax_skil: 1,
+    5: [1, 7],
+    max_shar: 1,
+    see: [1, 6],
+    detail: [1, 6],
+    what: 1,
+    repres: 1,
+    skill_name_col: 1,
+    skill_id_col: 1,
+    id: 1,
+    row: 1,
+    should: [1, 6, 7],
+    uniqu: 1,
+    skill_hier_info_col: 1,
+    info: 1,
+    na: 1,
+    skill_type_col: 1,
+    hier: 1,
+    categori: 1,
+    subcategori: 1,
+    wai: [1, 2, 7],
+    look: [1, 4, 6, 7],
+    spreadsheet: 1,
+    softwar: [1, 5],
+    abcd: 1,
+    6: [1, 7],
+    skill_group_3: 1,
+    nan: 1,
+    mathemat: 1,
+    skill_group_2: 1,
+    4: 1,
+    becaus: 1,
+    try: [1, 6],
+    individu: [1, 2, 4, 7],
+    isn: 1,
+    correspond: 1,
+    rather: [1, 2],
+    than: [1, 2, 7],
+    show: [1, 7],
+    all: [1, 2, 5, 6, 7],
+    part: [1, 4, 6],
+    where: [1, 2, 6, 7],
+    situat: [1, 4],
+    It: [1, 2],
+    help: 1,
+    link: 1,
+    g: [1, 4, 6],
+    sinc: 1,
+    content: 1,
+    string: [1, 2, 6],
+    both: [1, 2, 6, 7],
+    provid: [1, 5, 6],
+    guidanc: 1,
+    re: 1,
+    script: 1,
+    extract_skills_taxonomy_nam: 1,
+    describ: 1,
+    save: 1,
+    your_current_path: 1,
+    against: 1,
+    second: 1,
+    third: 1,
+    level_2: 1,
+    level_3: 1,
+    now: 1,
+    my_custom_config_nam: 1,
+    verbos: 2,
+    multi_process: 2,
+    config_path: 2,
+    thei: [2, 7],
+    public: 2,
+    limit: [2, 5, 7],
+    messag: 2,
+    debug: 2,
+    multiprocess: 2,
+    none: 2,
+    hard_labelled_skills_nam: 2,
+    previous: 2,
+    jobner: 2,
+    skillmapp: 2,
+    job_adverts_skil: 2,
+    union: 2,
+    take: [2, 6],
+    input: [2, 7],
+    correctli: 2,
+    return: 2,
+    togeth: 2,
+    skill_ent: 2,
+    taxonomy_skill_nam: 2,
+    taxonomy_skill_id: 2,
+    up: 2,
+    form: [2, 4],
+    microsoft: 2,
+    equal: 2,
+    accordingli: 2,
+    same: [2, 6],
+    assum: 2,
+    treat: 2,
+    singl: [2, 4, 7],
+    arrang: 2,
+    took: 4,
+    approach: [4, 6, 7],
+    got: 4,
+    studio: 4,
+    process: 4,
+    interfac: 4,
+    task: 4,
+    tri: 4,
+    best: [4, 7],
+    start: 4,
+    verb: 4,
+    sometim: 4,
+    wasn: 4,
+    easi: 4,
+    earlier: 4,
+    might: [4, 6],
+    later: 4,
+    team: 4,
+    basi: 4,
+    could: [4, 6],
+    sens: 4,
+    without: [4, 5, 7],
+    word: [4, 6],
+    whole: 4,
+    often: 4,
+    insur: 4,
+    qualif: 4,
+    electr: 4,
+    5641: [4, 6],
+    375: [4, 6],
+    354: [4, 6],
+    4696: [4, 6],
+    608: [4, 6],
+    20: [4, 6],
+    held: [4, 6],
+    copyright: 5,
+    c: 5,
+    2022: [5, 6],
+    permiss: 5,
+    herebi: 5,
+    grant: 5,
+    free: 5,
+    charg: 5,
+    person: 5,
+    obtain: 5,
+    copi: 5,
+    associ: 5,
+    deal: 5,
+    restrict: 5,
+    right: 5,
+    merg: 5,
+    publish: 5,
+    distribut: 5,
+    sublicens: 5,
+    sell: 5,
+    permit: 5,
+    whom: 5,
+    furnish: 5,
+    subject: 5,
+    condit: 5,
+    abov: [5, 6],
+    notic: 5,
+    shall: 5,
+    substanti: 5,
+    portion: 5,
+    THE: 5,
+    AS: 5,
+    warranti: 5,
+    OF: 5,
+    kind: 5,
+    express: 5,
+    OR: 5,
+    impli: 5,
+    BUT: 5,
+    NOT: 5,
+    TO: 5,
+    merchant: 5,
+    fit: 5,
+    FOR: 5,
+    noninfring: 5,
+    IN: 5,
+    NO: 5,
+    event: 5,
+    author: 5,
+    holder: 5,
+    BE: 5,
+    liabl: 5,
+    claim: 5,
+    damag: 5,
+    other: [5, 7],
+    liabil: 5,
+    action: 5,
+    contract: 5,
+    tort: 5,
+    otherwis: [5, 7],
+    aris: 5,
+    connect: [5, 6],
+    WITH: 5,
+    two: [6, 7],
+    scientist: 6,
+    analyt: 6,
+    practic: [6, 7],
+    last: 6,
+    updat: 6,
+    23: 6,
+    compon: 6,
+    scikit: 6,
+    svm: [6, 7],
+    featur: 6,
+    As: 6,
+    "15th": 6,
+    novemb: 6,
+    ojo: 6,
+    neural: 6,
+    network: 6,
+    architectur: 6,
+    Their: 6,
+    sophist: 6,
+    strategi: 6,
+    subword: 6,
+    bloom: 6,
+    deep: 6,
+    convolut: 6,
+    residu: 6,
+    novel: 6,
+    transit: 6,
+    pars: 6,
+    creation: 6,
+    nerevalu: 6,
+    calcul: [6, 7],
+    f1: 6,
+    precis: 6,
+    recal: 6,
+    classifi: 6,
+    result: [6, 7],
+    586: 6,
+    679: 6,
+    515: 6,
+    506: 6,
+    648: 6,
+    416: 6,
+    563: 6,
+    643: 6,
+    500: 6,
+    partial: 6,
+    across: 6,
+    ner_model: 6,
+    20220825: 6,
+    train_detail: 6,
+    json: 6,
+    On: [6, 7],
+    achiev: 6,
+    91: 6,
+    accuraci: 6,
+    splitter: 6,
+    253: 6,
+    ok: [6, 7],
+    bad: 6,
+    Of: [6, 7],
+    80: 6,
+    66: 6,
+    9: [6, 7],
+    25: 6,
+    mani: [6, 7],
+    get: 6,
+    unabl: 6,
+    still: 6,
+    futur: 6,
+    add: 6,
+    dedupl: [6, 7],
+    mean: 6,
+    mention: 6,
+    twice: 6,
+    just: 6,
+    onc: 6,
+    howev: [6, 7],
+    slightli: 6,
+    occurr: 6,
+    specif: 6,
+    disambigu: 6,
+    acronym: 6,
+    improv: 6,
+    methodologi: 6,
+    cosin: [6, 7],
+    huggingfac: [6, 7],
+    transform: [6, 7],
+    minilm: [6, 7],
+    l6: [6, 7],
+    v2: [6, 7],
+    close: 6,
+    assign: 6,
+    maximum: 6,
+    share: 6,
+    diagram: 6,
+    commonli: 6,
+    occur: 6,
+    doe: 6,
+    less: 6,
+    metaphor: 6,
+    understand: [6, 7],
+    bigger: 6,
+    pictur: 6,
+    draft: 6,
+    orsa: 6,
+    fine: 6,
+    art: 6,
+    poor: 6,
+    assist: [6, 7],
+    peopl: 6,
+    ensur: 6,
+    highest: 6,
+    unmatch: 6,
+    under: 6,
+    identifi: [6, 7],
+    approxim: 6,
+    relev: 6,
+    tune: 6,
+    high: 7,
+    overal: 7,
+    vector: 7,
+    machin: 7,
+    emb: 7,
+    pre: 7,
+    further: 7,
+    feedback: 7,
+    contact: 7,
+    liz: 7,
+    gallagh: 7,
+    india: 7,
+    kerl: 7,
+    cath: 7,
+    sleeman: 7,
+    onlin: 7,
+    research: 7,
+    labour: 7,
+    relat: 7,
+    govern: 7,
+    bodi: 7,
+    non: 7,
+    english: 7,
+    languag: 7,
+    draw: 7,
+    conclus: 7,
+    unidentifi: 7,
+    determin: 7,
+    demand: 7,
+    expert: 7,
+    steer: 7,
+    nor: 7,
+    discriminatori: 7,
+    hire: 7,
+    exact: 7,
+    sever: 7,
+    proxi: 7,
+    better: 7,
+    compar: 7,
+    75: 7,
+    percentil: 7,
+    count: 7,
+    At: 7,
+    find: 7,
+    58: 7,
+    averag: 7,
+    345: 7,
+    54: 7,
+    transvers: 7,
+    addit: 7,
+    94: 7,
+    percent: 7,
+    median: 7,
+    97: 7,
+    84: 7,
+    99: 7,
+    10: 7,
+    while: 7,
+    42: 7,
+    40: 7,
+    39: 7,
+    percentag: 7,
+    33: 7,
+    qualit: 7,
+    veri: 7,
+    financi: 7,
+    account: 7,
+    v: 7,
+    financ: 7,
+    consid: 7,
+    inappropri: 7,
+    ey: 7,
+    examin: 7,
+    suppli: 7,
+    chain: 7,
+    role: 7,
+    truth: 7,
+    floor: 7,
+    care: 7,
+    64: 7,
+    judg: 7,
+    shouldn: 7,
+    api: 7,
+    27: 7,
+    12: 7,
+    tag: 7,
+    thought: 7,
+    had: 7,
+    felt: 7,
+    183: 7,
+    73: 7,
+    19: 7,
+    8: 7,
+    172: 7,
+    53: 7,
+    30: 7,
+    17: 7,
+    16: 7,
+    35: 7,
+    83: 7,
+    87: 7,
+    lowest: 7,
+    its: 7,
+    score: 7,
+    71: 7,
+    51: 7,
+    90: 7,
+    attitud: 7,
+    63: 7,
+    knoweldg: 7,
+    "00": 7,
+  },
+  objects: {
+    "ojd_daps_skills.pipeline.extract_skills.extract_skills": [
+      [2, 0, 1, "", "ExtractSkills"],
+    ],
+    "ojd_daps_skills.pipeline.extract_skills.extract_skills.ExtractSkills": [
+      [2, 1, 1, "", "extract_skills"],
+      [2, 1, 1, "", "format_skills"],
+      [2, 1, 1, "", "get_skills"],
+      [2, 1, 1, "", "load"],
+      [2, 1, 1, "", "map_skills"],
+    ],
+  },
+  objtypes: { 0: "py:class", 1: "py:method" },
+  objnames: {
+    0: ["py", "class", "Python class"],
+    1: ["py", "method", "Python method"],
+  },
+  titleterms: {
+    skill: [0, 6, 7],
+    extractor: 0,
+    welcom: 0,
+    nesta: 0,
+    "": [0, 7],
+    librari: 0,
+    instal: 0,
+    name: [0, 1, 6],
+    aw: [],
+    cli: [],
+    tl: 0,
+    dr: 0,
+    us: [0, 7],
+    usag: [0, 1],
+    1: [0, 7],
+    extract: [0, 7],
+    AND: 0,
+    map: [0, 1, 6, 7],
+    2: [0, 7],
+    3: 0,
+    app: 0,
+    develop: 0,
+    setup: 0,
+    project: 0,
+    structur: 0,
+    test: 0,
+    analysi: 0,
+    contributor: 0,
+    guidelin: 0,
+    custom: 1,
+    configur: 1,
+    file: 1,
+    config_fil: 1,
+    predefin: 1,
+    predefined_config: 1,
+    definit: 1,
+    config_def: 1,
+    your: 1,
+    own: 1,
+    taxonomi: [1, 6],
+    format: 1,
+    format_tax: 1,
+    defin: 1,
+    custom_config: 1,
+    The: [2, 5],
+    extractskil: 2,
+    class: 2,
+    entiti: [4, 6],
+    label: 4,
+    train: [4, 6],
+    dataset: 4,
+    mit: 5,
+    licens: 5,
+    model: 6,
+    card: 6,
+    recognit: 6,
+    extract_skills_card: 6,
+    summari: [6, 7],
+    ner: 6,
+    metric: [6, 7],
+    multiskil: 6,
+    caveat: 6,
+    recommend: 6,
+    mapping_card: 6,
+    factor: 6,
+    pipelin: 7,
+    intend: 7,
+    out: 7,
+    scope: 7,
+    comparison: 7,
+    top: 7,
+    group: 7,
+    per: 7,
+    occup: 7,
+    esco: 7,
+    essenti: 7,
+    degre: 7,
+    overlap: 7,
+    between: 7,
+    lightcast: 7,
+    our: 7,
+    evalu: 7,
+    manual: 7,
+    judgement: 7,
+    fals: 7,
+    posit: 7,
+    rate: 7,
+    qualiti: 7,
+  },
+  envversion: {
+    "sphinx.domains.c": 2,
+    "sphinx.domains.changeset": 1,
+    "sphinx.domains.citation": 1,
+    "sphinx.domains.cpp": 8,
+    "sphinx.domains.index": 1,
+    "sphinx.domains.javascript": 2,
+    "sphinx.domains.math": 2,
+    "sphinx.domains.python": 3,
+    "sphinx.domains.rst": 2,
+    "sphinx.domains.std": 2,
+    "sphinx.ext.viewcode": 1,
+    sphinx: 57,
+  },
+  alltitles: {
+    "Skills Extractor": [[0, "skills-extractor"]],
+    "Welcome to Nesta\u2019s Skills Extractor Library": [
+      [0, "welcome-to-nesta-s-skills-extractor-library"],
+    ],
+    'Installation <a name="installation"></a>': [[0, "installation"]],
+    'TL;DR: Using Nesta\u2019s Skills Extractor library <a name="usage"></a>': [
+      [0, "tl-dr-using-nesta-s-skills-extractor-library"],
+    ],
+    "1. Extract AND map skills": [[0, "extract-and-map-skills"]],
+    "2. Extract skills": [[0, "extract-skills"]],
+    "3. Map skills": [[0, "map-skills"]],
+    App: [[0, "app"]],
+    'Development <a name="development"></a>': [[0, "development"]],
+    Setup: [[0, "setup"]],
+    "Project structure": [[0, "project-structure"]],
+    Testing: [[0, "testing"]],
+    Analysis: [[0, "analysis"]],
+    "Contributor guidelines": [[0, "contributor-guidelines"]],
+    "Custom Usage": [[1, "custom-usage"]],
+    'Configuration files <a name="config_files"></a>': [
+      [1, "configuration-files"],
+    ],
+    'Predefined configurations <a name="predefined_config"></a>': [
+      [1, "predefined-configurations"],
+    ],
+    'Configuration definitions <a name="config_defs"></a>': [
+      [1, "configuration-definitions"],
+    ],
+    'Mapping to your own taxonomy <a name="mapping"></a>': [
+      [1, "mapping-to-your-own-taxonomy"],
+    ],
+    'Format your taxonomy <a name="format_tax"></a>': [
+      [1, "format-your-taxonomy"],
+    ],
+    'Define your own configuration file <a name="custom_config"></a>': [
+      [1, "define-your-own-configuration-file"],
+    ],
+    "The ExtractSkills class": [[2, "the-extractskills-class"]],
+    "Entity Labelling": [[4, "entity-labelling"]],
+    "Training dataset": [[4, "training-dataset"]],
+    "The MIT License (MIT)": [[5, "the-mit-license-mit"]],
+    "Model Cards": [[6, "model-cards"]],
+    'Model Card: Named Entity Recognition Model <a name="extract_skills_card"></a>': [
+      [6, "model-card-named-entity-recognition-model"],
+    ],
+    Summary: [
+      [6, "summary"],
+      [6, "id1"],
+    ],
+    Training: [[6, "training"]],
+    "NER Metrics": [[6, "ner-metrics"]],
+    "Multiskill Metrics": [[6, "multiskill-metrics"]],
+    "Caveats and Recommendations": [
+      [6, "caveats-and-recommendations"],
+      [6, "id2"],
+    ],
+    'Model Card: Skills to Taxonomy Mapping <a name="mapping_card"></a>': [
+      [6, "model-card-skills-to-taxonomy-mapping"],
+    ],
+    "Model Factors": [[6, "model-factors"]],
+    "Pipeline summary and metrics": [[7, "pipeline-summary-and-metrics"]],
+    "Intended Use": [[7, "intended-use"]],
+    "Out of Scope Uses": [[7, "out-of-scope-uses"]],
+    Metrics: [[7, "metrics"]],
+    "Comparison 1 - Top skill groups per occupation comparison to ESCO essential skill groups per occupation": [
+      [
+        7,
+        "comparison-1-top-skill-groups-per-occupation-comparison-to-esco-essential-skill-groups-per-occupation",
+      ],
+    ],
+    "Comparison 2 - Degree of overlap between Lightcast\u2019s extracted skills and our Lightcast skills": [
+      [
+        7,
+        "comparison-2-degree-of-overlap-between-lightcasts-extracted-skills-and-our-lightcast-skills",
+      ],
+    ],
+    "Evaluation 1 - Manual judgement of false positive rate": [
+      [7, "evaluation-1-manual-judgement-of-false-positive-rate"],
+    ],
+    "Evaluation 2 - Manual judgement of skills extraction and mapping quality": [
+      [
+        7,
+        "evaluation-2-manual-judgement-of-skills-extraction-and-mapping-quality",
+      ],
+    ],
+  },
+  indexentries: {
+    "extractskills (class in ojd_daps_skills.pipeline.extract_skills.extract_skills)": [
+      [
+        2,
+        "ojd_daps_skills.pipeline.extract_skills.extract_skills.ExtractSkills",
+      ],
+    ],
+    "extract_skills() (ojd_daps_skills.pipeline.extract_skills.extract_skills.extractskills method)": [
+      [
+        2,
+        "ojd_daps_skills.pipeline.extract_skills.extract_skills.ExtractSkills.extract_skills",
+      ],
+    ],
+    "format_skills() (ojd_daps_skills.pipeline.extract_skills.extract_skills.extractskills method)": [
+      [
+        2,
+        "ojd_daps_skills.pipeline.extract_skills.extract_skills.ExtractSkills.format_skills",
+      ],
+    ],
+    "get_skills() (ojd_daps_skills.pipeline.extract_skills.extract_skills.extractskills method)": [
+      [
+        2,
+        "ojd_daps_skills.pipeline.extract_skills.extract_skills.ExtractSkills.get_skills",
+      ],
+    ],
+    "load() (ojd_daps_skills.pipeline.extract_skills.extract_skills.extractskills method)": [
+      [
+        2,
+        "ojd_daps_skills.pipeline.extract_skills.extract_skills.ExtractSkills.load",
+      ],
+    ],
+    "map_skills() (ojd_daps_skills.pipeline.extract_skills.extract_skills.extractskills method)": [
+      [
+        2,
+        "ojd_daps_skills.pipeline.extract_skills.extract_skills.ExtractSkills.map_skills",
+      ],
+    ],
+  },
+});
diff --git a/ojd_daps_skills/app/requirements_app.txt b/ojd_daps_skills/app/requirements_app.txt
index d1d944f6..48a229aa 100644
--- a/ojd_daps_skills/app/requirements_app.txt
+++ b/ojd_daps_skills/app/requirements_app.txt
@@ -1,4 +1,2 @@
-numpy==1.21.1
 ojd-daps-skills
-awscli==1.27.25
-streamlit== 1.16.0
\ No newline at end of file
+streamlit==1.16.0
diff --git a/ojd_daps_skills/getters/download_public_data.py b/ojd_daps_skills/getters/download_public_data.py
index a04656bc..9de3e80b 100644
--- a/ojd_daps_skills/getters/download_public_data.py
+++ b/ojd_daps_skills/getters/download_public_data.py
@@ -1,30 +1,35 @@
 from ojd_daps_skills import PUBLIC_DATA_FOLDER_NAME, PROJECT_DIR
 
 import os
-import platform
-import zipfile
+import boto3
+from botocore.exceptions import ClientError
+from botocore import UNSIGNED
+from botocore.config import Config
+from zipfile import ZipFile
 
 def download():
+    """Download public data. Expected to run once on first use."""
+    s3 = boto3.client(
+        "s3", region_name="eu-west-1", config=Config(signature_version=UNSIGNED)
+    )
+
+    bucket_name = "open-jobs-indicators"
+    key = f"escoe_extension/{PUBLIC_DATA_FOLDER_NAME}.zip"
 
     public_data_dir = os.path.join(PROJECT_DIR, PUBLIC_DATA_FOLDER_NAME)
 
-    if platform.system() == "Windows":
-      os.system(
-        f'aws --no-sign-request --region=eu-west-1 s3 cp s3://open-jobs-indicators/escoe_extension/{PUBLIC_DATA_FOLDER_NAME}.zip "{public_data_dir}.zip"'
-      )
-      with zipfile.ZipFile(f"{public_data_dir}.zip", 'r') as zip_ref:
-        zip_ref.extractall(f"{PROJECT_DIR}")
-        zip_ref.close()
+    try:
+        s3.download_file(bucket_name, key, f"{public_data_dir}.zip")
+
+        with ZipFile(f"{public_data_dir}.zip", "r") as zip_ref:
+            zip_ref.extractall(PROJECT_DIR)
+
         os.remove(f"{public_data_dir}.zip")
-      
-      return
 
-    os.system(
-      f"aws --no-sign-request --region=eu-west-1 s3 cp s3://open-jobs-indicators/escoe_extension/{PUBLIC_DATA_FOLDER_NAME}.zip {public_data_dir}.zip"
-    )
-    os.system(f"unzip {public_data_dir}.zip -d {PROJECT_DIR}")
-    os.system(f"rm {public_data_dir}.zip")
-    
+    except ClientError as ce:
+        print(f"Error: {ce}")
+    except FileNotFoundError as fnfe:
+        print(f"Error: {fnfe}")
 
 
 if __name__ == "__main__":
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 00000000..a6281427
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,8 @@
+[build-system]
+requires = [
+    "setuptools>=64",
+    "setuptools_scm>=8",
+]
+
+[tool.setuptools_scm]
+version_scheme = "release-branch-semver"
diff --git a/requirements.txt b/requirements.txt
index 02d6ff95..d02fdea5 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,15 +1,16 @@
-numpy==1.22.4
-scipy==1.8.1
+numpy==1.24.4
+scipy==1.10.1
 pandas==1.3.5
 tqdm==4.64.0
 filelock==3.7.1
 typer==0.4.1
 sh==1.14.2
-transformers==4.20.1
+transformers==4.33.3
 sentence-transformers==2.2.2
-scikit-learn==0.23.2
+scikit-learn==1.3.1
 spacy==3.4.0
 nervaluate==0.1.8
 s3fs==2022.5.0
 boto3==1.21.21
 toolz==0.12.0
+en_core_web_sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.4.1/en_core_web_sm-3.4.1.tar.gz
diff --git a/requirements_dev.txt b/requirements_dev.txt
index e3ec023d..9124b614 100644
--- a/requirements_dev.txt
+++ b/requirements_dev.txt
@@ -9,7 +9,6 @@ black
 Sphinx
 sphinxcontrib-napoleon
 sphinx-rtd-theme
-awscli==1.27.32
 pre-commit
 pre-commit-hooks
 spacy==3.4.0
diff --git a/setup.py b/setup.py
index d66c0f72..ea57b476 100644
--- a/setup.py
+++ b/setup.py
@@ -2,17 +2,7 @@
 from pathlib import Path
 from setuptools import find_packages
 from setuptools import setup
-
-import os
-import platform
-import subprocess
-
-
-tag_cmd = "git describe --tags --abbrev=0"
-tag_cmd = tag_cmd if platform.system() == "Windows" else f"echo $({tag_cmd})"
-tag_version = (
-    subprocess.check_output(tag_cmd, shell=True).decode("ascii").replace("\n", "")
-)
+import setuptools_scm
 
 
 def read_lines(path):
@@ -26,21 +16,21 @@ def read_lines(path):
 
 setup(
     name="ojd_daps_skills",
-    long_description=open(os.path.join(BASE_DIR, "README.md"), encoding="utf-8").read(),
+    long_description=open(BASE_DIR / "README.md", encoding="utf-8").read(),
     long_description_content_type="text/markdown",
-    install_requires=read_lines(os.path.join(BASE_DIR, "requirements.txt")),
-    extras_require={"dev": read_lines(os.path.join(BASE_DIR, "requirements_dev.txt"))},
+    install_requires=read_lines(BASE_DIR / "requirements.txt"),
+    extras_require={"dev": read_lines(BASE_DIR / "requirements_dev.txt")},
     packages=find_packages(
         exclude=["docs", "ojd_daps_skills/analysis", "ojd_daps_skills/app"]
     ),
-    classifiers=['Development Status :: 5 - Production/Stable'],
+    classifiers=["Development Status :: 5 - Production/Stable"],
     package_data={
         # If any package contains *.yaml files, include them:
         "": [
             "*.yaml",
         ],
     },
-    version=tag_version,
+    version=setuptools_scm.get_version(),
     description="Extract skills from job ads and maps them onto a skills taxonomy of your choice.",
     url="https://github.com/nestauk/ojd_daps_skills",
     project_urls={

Parameter	Description
+ + `ner_model_path`: str + +	+ + The relative path to the NER model folder used + to predict skill spans in job adverts. + +
+ + `taxonomy_name`: str + +	+ The name of the taxonomy to map onto. +
+ + `taxonomy_path`: str + +	+ + The relative path to the formatted taxonomy. + Formatted taxonomy must be in + `.csv` + format. + +
+ + `clean_job_ads`: bool, default=True + +	+ + Whether to perform light text cleaning on job + adverts or not. Text cleaning includes detecting + and splitting camelcase in job adverts, + replacing various characters and converting + bullet points to full stops. Defaults to True. + +
+ + `min_multiskill_length`: int + +	+ + The minimum character length a predicted + multi-skill sentence must be to apply splitting + rules to. + +
+ + (optional) + `taxonomy_embedding_file_name`: str + +	+ + The relative path to a taxonomy embedding file + if it exists. If left unset the embeddings will + be generated when the code is run. + +
+ + (optional) + `prev_skill_matches_file_name`: str + +	+ + The relative path to a previous skill matches + file if it exists. + +
+ + (optional) + `hard_labelled_skills_file_name`: str + +	+ + The relative path to a hard labelled skills file + if it exists. + +
+ + (optional) + `hier_name_mapper_file_name`: str + +	+ + The relative path to a hierarchy name mapper + file if it exists. + +
+ + `num_hier_levels`: int + +	+ + The number of levels in the skills taxonomy + hierarchy. This can be set to 0 if the taxonomy + has no levels. + +
+ + `skill_type_dict`: dict + +	+ + A dictionary that defines skill types and + hierarchy types. + `{ + "skill_types": + [A + list + of + the + values + of + the + 'type' + column + which + code + skills], + "hier_types": + [A + list + of + the + values + of + the + 'type' + column + which + code + skill + groups, + these + need + to + be + in + order + from + least + to + most + granular]}` + +
+ + `match_thresholds_dict`: dict + +	+ + A dictionary that defines thresholds at each + level of the skills taxonomy hierarchy. For + example, + + `{"skill_match_thresh": + 0.7, + "top_tax_skills": + {1: + 0.5, + 2: + 0.5, + 3: + 0.5},“max_share”: + {1: + 0, + 2: + 0.2, + 3: + 0.2}}` + + See + Model Card: Skills to Taxonomy + Mapping + for the details of what these thresholds + represent. + +
+ + `skill_name_col`: str + +	+ + The name of the skill/hierarchy level + description text column in formatted taxonomy + `.csv`. + +
+ + `skill_id_col`: str + +	+ + Name of skill id column in formatted taxonomy + `.csv`. Each row should contain a unique ID for the + skill/hierarchy. + +
+ + (optional) + `skill_hier_info_col`: str + +	+ + Name of hierarchy info column in formatted + taxonomy + `.csv`. The hierarchy info column contains which + hierarchy levels a skill is in (from least to + most granular). If not a skill, then NA. + +
+ + `skill_type_col`: str + +	+ + Name of what column name the skill/hier + description is from (category, subcategory) in + formatted taxonomy + `.csv`. + +
skill_type_col	skill_name_col	skill_id_col	+ (optional) skill_hier_info_col +
skill	use spreadsheets software	abcd	+ + `[["S", + "S5", + "S5.6", + "S5.6.1"], + ["S", + "S5", + "S5.5", + "S5.5.2"]]` + +
skill	use communication techniques	cdef	+ + `[["S", + "S1", + "S1.0", + "S1.0.0"]]` + +
skill_group_3	+ communication, collaboration and creativity +	S1.0.0	NaN
skill_group_3	mathematics	S1.2.1	NaN
skill_group_2	presenting information	S1.4	NaN