diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index 7a4aab61..ffd85a25 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -4,10 +4,15 @@ on: [push] jobs: build: - runs-on: ubuntu-latest + runs-on: ${{ matrix.os.host }} strategy: matrix: python-version: ["3.8", "3.9"] + os: + - name: ubuntu + host: ubuntu-latest + - name: windows + host: windows-latest steps: - uses: actions/checkout@v3 @@ -15,12 +20,30 @@ jobs: uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} - - name: Install dependencies + cache: "pip" + + - name: (ubuntu) Install dependencies + if: runner.os != 'windows' run: | python -m pip install --upgrade pip pip install -r requirements_dev.txt python -m spacy download en_core_web_sm pip install -e ."[test]" - - name: Test with pytest + - name: (ubuntu) Test with pytest + if: runner.os != 'windows' + run: | + pytest --verbose + + - name: (windows) Install dependencies + if: runner.os == 'windows' + shell: bash + run: | + python -m pip install --upgrade pip + pip install -r requirements_dev.txt + python -m spacy download en_core_web_sm + pip install -e . + - name: (windows) Test with pytest + if: runner.os == 'windows' + shell: bash run: | pytest --verbose diff --git a/README.md b/README.md index 17635247..f2b055a1 100644 --- a/README.md +++ b/README.md @@ -29,16 +29,9 @@ You can use pip to install the library: pip install ojd-daps-skills ``` -You will also need to download [spaCy's](https://spacy.io/models/en) `en_core_web_sm` model: +Note that this package was developed on MacOS and tested on Ubuntu. Changes have been made to be compatible on a Windows system but are not tested and cannot be guaranteed. -``` -python -m spacy download en_core_web_sm -``` - -Note that this package was developed on MacOS and tested on Ubuntu. Changes have been made to be compatible on a Windows system but are not tested and cannot be guaranteed. -### AWS CLI - -When the package is first used it will automatically download a folder of neccessary data and models. This file is ~ 1GB. Although you don't need to have AWS credentials for this to work, you will need to download the [AWS CLI](https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html). +When the package is first used it will automatically download a folder of neccessary data and models. (~1GB) ## TL;DR: Using Nesta's Skills Extractor library @@ -152,10 +145,6 @@ git clone git@github.com:nestauk/ojd_daps_skills.git - `mkdir .cookiecutter/state` - `touch .cookiecutter/state/conda-create.log` - Run `make install` to configure the development environment -- Download spacy model: - - `python -m spacy download en_core_web_sm` - -If you don't have the AWS CLI installed - you can download a zipped folder of the data [by clicking here](https://open-jobs-indicators.s3.eu-west-1.amazonaws.com/escoe_extension/ojd_daps_skills_data.zip). After downloading and unzipping, it is important that this folder is moved to the project's parent folder - i.e. `ojd_daps_skills/`. ### Project structure diff --git a/docs/build/doctrees/about.doctree b/docs/build/doctrees/about.doctree index 55411246..cf681ec1 100644 Binary files a/docs/build/doctrees/about.doctree and b/docs/build/doctrees/about.doctree differ diff --git a/docs/build/doctrees/custom_usage.doctree b/docs/build/doctrees/custom_usage.doctree index bf0602a2..f0b13a47 100644 Binary files a/docs/build/doctrees/custom_usage.doctree and b/docs/build/doctrees/custom_usage.doctree differ diff --git a/docs/build/doctrees/environment.pickle b/docs/build/doctrees/environment.pickle index 230395d8..8dc0b74c 100644 Binary files a/docs/build/doctrees/environment.pickle and b/docs/build/doctrees/environment.pickle differ diff --git a/docs/build/doctrees/extract_skills.doctree b/docs/build/doctrees/extract_skills.doctree index e391247d..79340b72 100644 Binary files a/docs/build/doctrees/extract_skills.doctree and b/docs/build/doctrees/extract_skills.doctree differ diff --git a/docs/build/doctrees/index.doctree b/docs/build/doctrees/index.doctree index 2817368f..a64fc3b7 100644 Binary files a/docs/build/doctrees/index.doctree and b/docs/build/doctrees/index.doctree differ diff --git a/docs/build/doctrees/labelling.doctree b/docs/build/doctrees/labelling.doctree index 0c44bbde..989b3f01 100644 Binary files a/docs/build/doctrees/labelling.doctree and b/docs/build/doctrees/labelling.doctree differ diff --git a/docs/build/doctrees/license.doctree b/docs/build/doctrees/license.doctree index 50004c37..104b29ed 100644 Binary files a/docs/build/doctrees/license.doctree and b/docs/build/doctrees/license.doctree differ diff --git a/docs/build/doctrees/model_card.doctree b/docs/build/doctrees/model_card.doctree index ccce3340..22f5fca3 100644 Binary files a/docs/build/doctrees/model_card.doctree and b/docs/build/doctrees/model_card.doctree differ diff --git a/docs/build/doctrees/pipeline_summary.doctree b/docs/build/doctrees/pipeline_summary.doctree index 84f1a440..0158cb91 100644 Binary files a/docs/build/doctrees/pipeline_summary.doctree and b/docs/build/doctrees/pipeline_summary.doctree differ diff --git a/docs/build/html/_static/basic.css b/docs/build/html/_static/basic.css index eeb0519a..1c79f9b4 100644 --- a/docs/build/html/_static/basic.css +++ b/docs/build/html/_static/basic.css @@ -12,241 +12,233 @@ /* -- main layout ----------------------------------------------------------- */ div.clearer { - clear: both; + clear: both; } div.section::after { - display: block; - content: ''; - clear: left; + display: block; + content: ""; + clear: left; } /* -- relbar ---------------------------------------------------------------- */ div.related { - width: 100%; - font-size: 90%; + width: 100%; + font-size: 90%; } div.related h3 { - display: none; + display: none; } div.related ul { - margin: 0; - padding: 0 0 0 10px; - list-style: none; + margin: 0; + padding: 0 0 0 10px; + list-style: none; } div.related li { - display: inline; + display: inline; } div.related li.right { - float: right; - margin-right: 5px; + float: right; + margin-right: 5px; } /* -- sidebar --------------------------------------------------------------- */ div.sphinxsidebarwrapper { - padding: 10px 5px 0 10px; + padding: 10px 5px 0 10px; } div.sphinxsidebar { - float: left; - width: 230px; - margin-left: -100%; - font-size: 90%; - word-wrap: break-word; - overflow-wrap : break-word; + float: left; + width: 230px; + margin-left: -100%; + font-size: 90%; + word-wrap: break-word; + overflow-wrap: break-word; } div.sphinxsidebar ul { - list-style: none; + list-style: none; } div.sphinxsidebar ul ul, div.sphinxsidebar ul.want-points { - margin-left: 20px; - list-style: square; + margin-left: 20px; + list-style: square; } div.sphinxsidebar ul ul { - margin-top: 0; - margin-bottom: 0; + margin-top: 0; + margin-bottom: 0; } div.sphinxsidebar form { - margin-top: 10px; + margin-top: 10px; } div.sphinxsidebar input { - border: 1px solid #98dbcc; - font-family: sans-serif; - font-size: 1em; + border: 1px solid #98dbcc; + font-family: sans-serif; + font-size: 1em; } div.sphinxsidebar #searchbox form.search { - overflow: hidden; + overflow: hidden; } div.sphinxsidebar #searchbox input[type="text"] { - float: left; - width: 80%; - padding: 0.25em; - box-sizing: border-box; + float: left; + width: 80%; + padding: 0.25em; + box-sizing: border-box; } div.sphinxsidebar #searchbox input[type="submit"] { - float: left; - width: 20%; - border-left: none; - padding: 0.25em; - box-sizing: border-box; + float: left; + width: 20%; + border-left: none; + padding: 0.25em; + box-sizing: border-box; } - img { - border: 0; - max-width: 100%; + border: 0; + max-width: 100%; } /* -- search page ----------------------------------------------------------- */ ul.search { - margin: 10px 0 0 20px; - padding: 0; + margin: 10px 0 0 20px; + padding: 0; } ul.search li { - padding: 5px 0 5px 20px; - background-image: url(file.png); - background-repeat: no-repeat; - background-position: 0 7px; + padding: 5px 0 5px 20px; + background-image: url(file.png); + background-repeat: no-repeat; + background-position: 0 7px; } ul.search li a { - font-weight: bold; + font-weight: bold; } ul.search li p.context { - color: #888; - margin: 2px 0 0 30px; - text-align: left; + color: #888; + margin: 2px 0 0 30px; + text-align: left; } ul.keywordmatches li.goodmatch a { - font-weight: bold; + font-weight: bold; } /* -- index page ------------------------------------------------------------ */ table.contentstable { - width: 90%; - margin-left: auto; - margin-right: auto; + width: 90%; + margin-left: auto; + margin-right: auto; } table.contentstable p.biglink { - line-height: 150%; + line-height: 150%; } a.biglink { - font-size: 1.3em; + font-size: 1.3em; } span.linkdescr { - font-style: italic; - padding-top: 5px; - font-size: 90%; + font-style: italic; + padding-top: 5px; + font-size: 90%; } /* -- general index --------------------------------------------------------- */ table.indextable { - width: 100%; + width: 100%; } table.indextable td { - text-align: left; - vertical-align: top; + text-align: left; + vertical-align: top; } table.indextable ul { - margin-top: 0; - margin-bottom: 0; - list-style-type: none; + margin-top: 0; + margin-bottom: 0; + list-style-type: none; } table.indextable > tbody > tr > td > ul { - padding-left: 0em; + padding-left: 0em; } table.indextable tr.pcap { - height: 10px; + height: 10px; } table.indextable tr.cap { - margin-top: 10px; - background-color: #f2f2f2; + margin-top: 10px; + background-color: #f2f2f2; } img.toggler { - margin-right: 3px; - margin-top: 3px; - cursor: pointer; + margin-right: 3px; + margin-top: 3px; + cursor: pointer; } div.modindex-jumpbox { - border-top: 1px solid #ddd; - border-bottom: 1px solid #ddd; - margin: 1em 0 1em 0; - padding: 0.4em; + border-top: 1px solid #ddd; + border-bottom: 1px solid #ddd; + margin: 1em 0 1em 0; + padding: 0.4em; } div.genindex-jumpbox { - border-top: 1px solid #ddd; - border-bottom: 1px solid #ddd; - margin: 1em 0 1em 0; - padding: 0.4em; + border-top: 1px solid #ddd; + border-bottom: 1px solid #ddd; + margin: 1em 0 1em 0; + padding: 0.4em; } /* -- domain module index --------------------------------------------------- */ table.modindextable td { - padding: 2px; - border-collapse: collapse; + padding: 2px; + border-collapse: collapse; } /* -- general body styles --------------------------------------------------- */ div.body { - min-width: 360px; - max-width: 800px; + min-width: 360px; + max-width: 800px; } -div.body p, div.body dd, div.body li, div.body blockquote { - -moz-hyphens: auto; - -ms-hyphens: auto; - -webkit-hyphens: auto; - hyphens: auto; +div.body p, +div.body dd, +div.body li, +div.body blockquote { + -moz-hyphens: auto; + -ms-hyphens: auto; + -webkit-hyphens: auto; + hyphens: auto; } a.headerlink { - visibility: hidden; -} -a.brackets:before, -span.brackets > a:before{ - content: "["; + visibility: hidden; } -a.brackets:after, -span.brackets > a:after { - content: "]"; -} - - h1:hover > a.headerlink, h2:hover > a.headerlink, h3:hover > a.headerlink, @@ -257,213 +249,237 @@ dt:hover > a.headerlink, caption:hover > a.headerlink, p.caption:hover > a.headerlink, div.code-block-caption:hover > a.headerlink { - visibility: visible; + visibility: visible; } div.body p.caption { - text-align: inherit; + text-align: inherit; } div.body td { - text-align: left; + text-align: left; } .first { - margin-top: 0 !important; + margin-top: 0 !important; } p.rubric { - margin-top: 30px; - font-weight: bold; + margin-top: 30px; + font-weight: bold; } -img.align-left, figure.align-left, .figure.align-left, object.align-left { - clear: left; - float: left; - margin-right: 1em; +img.align-left, +figure.align-left, +.figure.align-left, +object.align-left { + clear: left; + float: left; + margin-right: 1em; } -img.align-right, figure.align-right, .figure.align-right, object.align-right { - clear: right; - float: right; - margin-left: 1em; +img.align-right, +figure.align-right, +.figure.align-right, +object.align-right { + clear: right; + float: right; + margin-left: 1em; } -img.align-center, figure.align-center, .figure.align-center, object.align-center { +img.align-center, +figure.align-center, +.figure.align-center, +object.align-center { display: block; margin-left: auto; margin-right: auto; } -img.align-default, figure.align-default, .figure.align-default { +img.align-default, +figure.align-default, +.figure.align-default { display: block; margin-left: auto; margin-right: auto; } .align-left { - text-align: left; + text-align: left; } .align-center { - text-align: center; + text-align: center; } .align-default { - text-align: center; + text-align: center; } .align-right { - text-align: right; + text-align: right; } /* -- sidebars -------------------------------------------------------------- */ div.sidebar, aside.sidebar { - margin: 0 0 0.5em 1em; - border: 1px solid #ddb; - padding: 7px; - background-color: #ffe; - width: 40%; - float: right; - clear: right; - overflow-x: auto; + margin: 0 0 0.5em 1em; + border: 1px solid #ddb; + padding: 7px; + background-color: #ffe; + width: 40%; + float: right; + clear: right; + overflow-x: auto; } p.sidebar-title { - font-weight: bold; + font-weight: bold; } -div.admonition, div.topic, blockquote { - clear: left; +nav.contents, +aside.topic, +div.admonition, +div.topic, +blockquote { + clear: left; } /* -- topics ---------------------------------------------------------------- */ +nav.contents, +aside.topic, div.topic { - border: 1px solid #ccc; - padding: 7px; - margin: 10px 0 10px 0; + border: 1px solid #ccc; + padding: 7px; + margin: 10px 0 10px 0; } p.topic-title { - font-size: 1.1em; - font-weight: bold; - margin-top: 10px; + font-size: 1.1em; + font-weight: bold; + margin-top: 10px; } /* -- admonitions ----------------------------------------------------------- */ div.admonition { - margin-top: 10px; - margin-bottom: 10px; - padding: 7px; + margin-top: 10px; + margin-bottom: 10px; + padding: 7px; } div.admonition dt { - font-weight: bold; + font-weight: bold; } p.admonition-title { - margin: 0px 10px 5px 0px; - font-weight: bold; + margin: 0px 10px 5px 0px; + font-weight: bold; } div.body p.centered { - text-align: center; - margin-top: 25px; + text-align: center; + margin-top: 25px; } /* -- content of sidebars/topics/admonitions -------------------------------- */ div.sidebar > :last-child, aside.sidebar > :last-child, +nav.contents > :last-child, +aside.topic > :last-child, div.topic > :last-child, div.admonition > :last-child { - margin-bottom: 0; + margin-bottom: 0; } div.sidebar::after, aside.sidebar::after, +nav.contents::after, +aside.topic::after, div.topic::after, div.admonition::after, blockquote::after { - display: block; - content: ''; - clear: both; + display: block; + content: ""; + clear: both; } /* -- tables ---------------------------------------------------------------- */ table.docutils { - margin-top: 10px; - margin-bottom: 10px; - border: 0; - border-collapse: collapse; + margin-top: 10px; + margin-bottom: 10px; + border: 0; + border-collapse: collapse; } table.align-center { - margin-left: auto; - margin-right: auto; + margin-left: auto; + margin-right: auto; } table.align-default { - margin-left: auto; - margin-right: auto; + margin-left: auto; + margin-right: auto; } table caption span.caption-number { - font-style: italic; + font-style: italic; } table caption span.caption-text { } -table.docutils td, table.docutils th { - padding: 1px 8px 1px 5px; - border-top: 0; - border-left: 0; - border-right: 0; - border-bottom: 1px solid #aaa; +table.docutils td, +table.docutils th { + padding: 1px 8px 1px 5px; + border-top: 0; + border-left: 0; + border-right: 0; + border-bottom: 1px solid #aaa; } th { - text-align: left; - padding-right: 5px; + text-align: left; + padding-right: 5px; } table.citation { - border-left: solid 1px gray; - margin-left: 1px; + border-left: solid 1px gray; + margin-left: 1px; } table.citation td { - border-bottom: none; + border-bottom: none; } th > :first-child, td > :first-child { - margin-top: 0px; + margin-top: 0px; } th > :last-child, td > :last-child { - margin-bottom: 0px; + margin-bottom: 0px; } /* -- figures --------------------------------------------------------------- */ -div.figure, figure { - margin: 0.5em; - padding: 0.5em; +div.figure, +figure { + margin: 0.5em; + padding: 0.5em; } -div.figure p.caption, figcaption { - padding: 0.3em; +div.figure p.caption, +figcaption { + padding: 0.3em; } div.figure p.caption span.caption-number, figcaption span.caption-number { - font-style: italic; + font-style: italic; } div.figure p.caption span.caption-text, @@ -472,349 +488,365 @@ figcaption span.caption-text { /* -- field list styles ----------------------------------------------------- */ -table.field-list td, table.field-list th { - border: 0 !important; +table.field-list td, +table.field-list th { + border: 0 !important; } .field-list ul { - margin: 0; - padding-left: 1em; + margin: 0; + padding-left: 1em; } .field-list p { - margin: 0; + margin: 0; } .field-name { - -moz-hyphens: manual; - -ms-hyphens: manual; - -webkit-hyphens: manual; - hyphens: manual; + -moz-hyphens: manual; + -ms-hyphens: manual; + -webkit-hyphens: manual; + hyphens: manual; } /* -- hlist styles ---------------------------------------------------------- */ table.hlist { - margin: 1em 0; + margin: 1em 0; } table.hlist td { - vertical-align: top; + vertical-align: top; } /* -- object description styles --------------------------------------------- */ .sig { - font-family: 'Consolas', 'Menlo', 'DejaVu Sans Mono', 'Bitstream Vera Sans Mono', monospace; + font-family: "Consolas", "Menlo", "DejaVu Sans Mono", + "Bitstream Vera Sans Mono", monospace; } -.sig-name, code.descname { - background-color: transparent; - font-weight: bold; +.sig-name, +code.descname { + background-color: transparent; + font-weight: bold; } .sig-name { - font-size: 1.1em; + font-size: 1.1em; } code.descname { - font-size: 1.2em; + font-size: 1.2em; } -.sig-prename, code.descclassname { - background-color: transparent; +.sig-prename, +code.descclassname { + background-color: transparent; } .optional { - font-size: 1.3em; + font-size: 1.3em; } .sig-paren { - font-size: larger; + font-size: larger; } .sig-param.n { - font-style: italic; + font-style: italic; } /* C++ specific styling */ .sig-inline.c-texpr, .sig-inline.cpp-texpr { - font-family: unset; + font-family: unset; } -.sig.c .k, .sig.c .kt, -.sig.cpp .k, .sig.cpp .kt { - color: #0033B3; +.sig.c .k, +.sig.c .kt, +.sig.cpp .k, +.sig.cpp .kt { + color: #0033b3; } -.sig.c .m, +.sig.c .m, .sig.cpp .m { - color: #1750EB; + color: #1750eb; } -.sig.c .s, .sig.c .sc, -.sig.cpp .s, .sig.cpp .sc { - color: #067D17; +.sig.c .s, +.sig.c .sc, +.sig.cpp .s, +.sig.cpp .sc { + color: #067d17; } - /* -- other body styles ----------------------------------------------------- */ ol.arabic { - list-style: decimal; + list-style: decimal; } ol.loweralpha { - list-style: lower-alpha; + list-style: lower-alpha; } ol.upperalpha { - list-style: upper-alpha; + list-style: upper-alpha; } ol.lowerroman { - list-style: lower-roman; + list-style: lower-roman; } ol.upperroman { - list-style: upper-roman; + list-style: upper-roman; } :not(li) > ol > li:first-child > :first-child, :not(li) > ul > li:first-child > :first-child { - margin-top: 0px; + margin-top: 0px; } :not(li) > ol > li:last-child > :last-child, :not(li) > ul > li:last-child > :last-child { - margin-bottom: 0px; + margin-bottom: 0px; } ol.simple ol p, ol.simple ul p, ul.simple ol p, ul.simple ul p { - margin-top: 0; + margin-top: 0; } ol.simple > li:not(:first-child) > p, ul.simple > li:not(:first-child) > p { - margin-top: 0; + margin-top: 0; } ol.simple p, ul.simple p { - margin-bottom: 0; + margin-bottom: 0; } -dl.footnote > dt, -dl.citation > dt { - float: left; - margin-right: 0.5em; +aside.footnote > span, +div.citation > span { + float: left; } - -dl.footnote > dd, -dl.citation > dd { - margin-bottom: 0em; +aside.footnote > span:last-of-type, +div.citation > span:last-of-type { + padding-right: 0.5em; } - -dl.footnote > dd:after, -dl.citation > dd:after { - content: ""; - clear: both; +aside.footnote > p { + margin-left: 2em; +} +div.citation > p { + margin-left: 4em; +} +aside.footnote > p:last-of-type, +div.citation > p:last-of-type { + margin-bottom: 0em; +} +aside.footnote > p:last-of-type:after, +div.citation > p:last-of-type:after { + content: ""; + clear: both; } dl.field-list { - display: grid; - grid-template-columns: fit-content(30%) auto; + display: grid; + grid-template-columns: fit-content(30%) auto; } dl.field-list > dt { - font-weight: bold; - word-break: break-word; - padding-left: 0.5em; - padding-right: 5px; -} -dl.field-list > dt:after { - content: ":"; + font-weight: bold; + word-break: break-word; + padding-left: 0.5em; + padding-right: 5px; } - dl.field-list > dd { - padding-left: 0.5em; - margin-top: 0em; - margin-left: 0em; - margin-bottom: 0em; + padding-left: 0.5em; + margin-top: 0em; + margin-left: 0em; + margin-bottom: 0em; } dl { - margin-bottom: 15px; + margin-bottom: 15px; } dd > :first-child { - margin-top: 0px; + margin-top: 0px; } -dd ul, dd table { - margin-bottom: 10px; +dd ul, +dd table { + margin-bottom: 10px; } dd { - margin-top: 3px; - margin-bottom: 10px; - margin-left: 30px; + margin-top: 3px; + margin-bottom: 10px; + margin-left: 30px; } dl > dd:last-child, dl > dd:last-child > :last-child { - margin-bottom: 0; + margin-bottom: 0; } -dt:target, span.highlighted { - background-color: #fbe54e; +dt:target, +span.highlighted { + background-color: #fbe54e; } rect.highlighted { - fill: #fbe54e; + fill: #fbe54e; } dl.glossary dt { - font-weight: bold; - font-size: 1.1em; + font-weight: bold; + font-size: 1.1em; } .versionmodified { - font-style: italic; + font-style: italic; } .system-message { - background-color: #fda; - padding: 5px; - border: 3px solid red; + background-color: #fda; + padding: 5px; + border: 3px solid red; } -.footnote:target { - background-color: #ffa; +.footnote:target { + background-color: #ffa; } .line-block { - display: block; - margin-top: 1em; - margin-bottom: 1em; + display: block; + margin-top: 1em; + margin-bottom: 1em; } .line-block .line-block { - margin-top: 0; - margin-bottom: 0; - margin-left: 1.5em; + margin-top: 0; + margin-bottom: 0; + margin-left: 1.5em; } -.guilabel, .menuselection { - font-family: sans-serif; +.guilabel, +.menuselection { + font-family: sans-serif; } .accelerator { - text-decoration: underline; + text-decoration: underline; } .classifier { - font-style: oblique; + font-style: oblique; } .classifier:before { - font-style: normal; - margin: 0 0.5em; - content: ":"; - display: inline-block; + font-style: normal; + margin: 0 0.5em; + content: ":"; + display: inline-block; } -abbr, acronym { - border-bottom: dotted 1px; - cursor: help; +abbr, +acronym { + border-bottom: dotted 1px; + cursor: help; } /* -- code displays --------------------------------------------------------- */ pre { - overflow: auto; - overflow-y: hidden; /* fixes display issues on Chrome browsers */ + overflow: auto; + overflow-y: hidden; /* fixes display issues on Chrome browsers */ } -pre, div[class*="highlight-"] { - clear: both; +pre, +div[class*="highlight-"] { + clear: both; } span.pre { - -moz-hyphens: none; - -ms-hyphens: none; - -webkit-hyphens: none; - hyphens: none; - white-space: nowrap; + -moz-hyphens: none; + -ms-hyphens: none; + -webkit-hyphens: none; + hyphens: none; + white-space: nowrap; } div[class*="highlight-"] { - margin: 1em 0; + margin: 1em 0; } td.linenos pre { - border: 0; - background-color: transparent; - color: #aaa; + border: 0; + background-color: transparent; + color: #aaa; } table.highlighttable { - display: block; + display: block; } table.highlighttable tbody { - display: block; + display: block; } table.highlighttable tr { - display: flex; + display: flex; } table.highlighttable td { - margin: 0; - padding: 0; + margin: 0; + padding: 0; } table.highlighttable td.linenos { - padding-right: 0.5em; + padding-right: 0.5em; } table.highlighttable td.code { - flex: 1; - overflow: hidden; + flex: 1; + overflow: hidden; } .highlight .hll { - display: block; + display: block; } div.highlight pre, table.highlighttable pre { - margin: 0; + margin: 0; } div.code-block-caption + div { - margin-top: 0; + margin-top: 0; } div.code-block-caption { - margin-top: 1em; - padding: 2px 5px; - font-size: small; + margin-top: 1em; + padding: 2px 5px; + font-size: small; } div.code-block-caption code { - background-color: transparent; + background-color: transparent; } table.highlighttable td.linenos, span.linenos, -div.highlight span.gp { /* gp: Generic.Prompt */ +div.highlight span.gp { + /* gp: Generic.Prompt */ user-select: none; -webkit-user-select: text; /* Safari fallback only */ -webkit-user-select: none; /* Chrome/Safari */ @@ -823,77 +855,83 @@ div.highlight span.gp { /* gp: Generic.Prompt */ } div.code-block-caption span.caption-number { - padding: 0.1em 0.3em; - font-style: italic; + padding: 0.1em 0.3em; + font-style: italic; } div.code-block-caption span.caption-text { } div.literal-block-wrapper { - margin: 1em 0; + margin: 1em 0; } -code.xref, a code { - background-color: transparent; - font-weight: bold; +code.xref, +a code { + background-color: transparent; + font-weight: bold; } -h1 code, h2 code, h3 code, h4 code, h5 code, h6 code { - background-color: transparent; +h1 code, +h2 code, +h3 code, +h4 code, +h5 code, +h6 code { + background-color: transparent; } .viewcode-link { - float: right; + float: right; } .viewcode-back { - float: right; - font-family: sans-serif; + float: right; + font-family: sans-serif; } div.viewcode-block:target { - margin: -1px -10px; - padding: 0 10px; + margin: -1px -10px; + padding: 0 10px; } /* -- math display ---------------------------------------------------------- */ img.math { - vertical-align: middle; + vertical-align: middle; } div.body div.math p { - text-align: center; + text-align: center; } span.eqno { - float: right; + float: right; } span.eqno a.headerlink { - position: absolute; - z-index: 1; + position: absolute; + z-index: 1; } div.math:hover a.headerlink { - visibility: visible; + visibility: visible; } /* -- printout stylesheet --------------------------------------------------- */ @media print { - div.document, - div.documentwrapper, - div.bodywrapper { - margin: 0 !important; - width: 100%; - } - - div.sphinxsidebar, - div.related, - div.footer, - #top-link { - display: none; - } -} \ No newline at end of file + div.document, + div.documentwrapper, + div.bodywrapper { + margin: 0 !important; + width: 100%; + } + + div.sphinxsidebar, + div.related, + div.footer, + #top-link { + display: none; + } +} diff --git a/docs/build/html/_static/pygments.css b/docs/build/html/_static/pygments.css index 75471509..e7e91a11 100644 --- a/docs/build/html/_static/pygments.css +++ b/docs/build/html/_static/pygments.css @@ -1,255 +1,892 @@ -.highlight pre { line-height: 125%; } -.highlight td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } -.highlight span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } -.highlight td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } -.highlight span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } -.highlight .hll { background-color: #ffffcc } -.highlight { background: #f8f8f8; } -.highlight .c { color: #8f5902; font-style: italic } /* Comment */ -.highlight .err { color: #a40000; border: 1px solid #ef2929 } /* Error */ -.highlight .g { color: #000000 } /* Generic */ -.highlight .k { color: #204a87; font-weight: bold } /* Keyword */ -.highlight .l { color: #000000 } /* Literal */ -.highlight .n { color: #000000 } /* Name */ -.highlight .o { color: #ce5c00; font-weight: bold } /* Operator */ -.highlight .x { color: #000000 } /* Other */ -.highlight .p { color: #000000; font-weight: bold } /* Punctuation */ -.highlight .ch { color: #8f5902; font-style: italic } /* Comment.Hashbang */ -.highlight .cm { color: #8f5902; font-style: italic } /* Comment.Multiline */ -.highlight .cp { color: #8f5902; font-style: italic } /* Comment.Preproc */ -.highlight .cpf { color: #8f5902; font-style: italic } /* Comment.PreprocFile */ -.highlight .c1 { color: #8f5902; font-style: italic } /* Comment.Single */ -.highlight .cs { color: #8f5902; font-style: italic } /* Comment.Special */ -.highlight .gd { color: #a40000 } /* Generic.Deleted */ -.highlight .ge { color: #000000; font-style: italic } /* Generic.Emph */ -.highlight .gr { color: #ef2929 } /* Generic.Error */ -.highlight .gh { color: #000080; font-weight: bold } /* Generic.Heading */ -.highlight .gi { color: #00A000 } /* Generic.Inserted */ -.highlight .go { color: #000000; font-style: italic } /* Generic.Output */ -.highlight .gp { color: #8f5902 } /* Generic.Prompt */ -.highlight .gs { color: #000000; font-weight: bold } /* Generic.Strong */ -.highlight .gu { color: #800080; font-weight: bold } /* Generic.Subheading */ -.highlight .gt { color: #a40000; font-weight: bold } /* Generic.Traceback */ -.highlight .kc { color: #204a87; font-weight: bold } /* Keyword.Constant */ -.highlight .kd { color: #204a87; font-weight: bold } /* Keyword.Declaration */ -.highlight .kn { color: #204a87; font-weight: bold } /* Keyword.Namespace */ -.highlight .kp { color: #204a87; font-weight: bold } /* Keyword.Pseudo */ -.highlight .kr { color: #204a87; font-weight: bold } /* Keyword.Reserved */ -.highlight .kt { color: #204a87; font-weight: bold } /* Keyword.Type */ -.highlight .ld { color: #000000 } /* Literal.Date */ -.highlight .m { color: #0000cf; font-weight: bold } /* Literal.Number */ -.highlight .s { color: #4e9a06 } /* Literal.String */ -.highlight .na { color: #c4a000 } /* Name.Attribute */ -.highlight .nb { color: #204a87 } /* Name.Builtin */ -.highlight .nc { color: #000000 } /* Name.Class */ -.highlight .no { color: #000000 } /* Name.Constant */ -.highlight .nd { color: #5c35cc; font-weight: bold } /* Name.Decorator */ -.highlight .ni { color: #ce5c00 } /* Name.Entity */ -.highlight .ne { color: #cc0000; font-weight: bold } /* Name.Exception */ -.highlight .nf { color: #000000 } /* Name.Function */ -.highlight .nl { color: #f57900 } /* Name.Label */ -.highlight .nn { color: #000000 } /* Name.Namespace */ -.highlight .nx { color: #000000 } /* Name.Other */ -.highlight .py { color: #000000 } /* Name.Property */ -.highlight .nt { color: #204a87; font-weight: bold } /* Name.Tag */ -.highlight .nv { color: #000000 } /* Name.Variable */ -.highlight .ow { color: #204a87; font-weight: bold } /* Operator.Word */ -.highlight .pm { color: #000000; font-weight: bold } /* Punctuation.Marker */ -.highlight .w { color: #f8f8f8 } /* Text.Whitespace */ -.highlight .mb { color: #0000cf; font-weight: bold } /* Literal.Number.Bin */ -.highlight .mf { color: #0000cf; font-weight: bold } /* Literal.Number.Float */ -.highlight .mh { color: #0000cf; font-weight: bold } /* Literal.Number.Hex */ -.highlight .mi { color: #0000cf; font-weight: bold } /* Literal.Number.Integer */ -.highlight .mo { color: #0000cf; font-weight: bold } /* Literal.Number.Oct */ -.highlight .sa { color: #4e9a06 } /* Literal.String.Affix */ -.highlight .sb { color: #4e9a06 } /* Literal.String.Backtick */ -.highlight .sc { color: #4e9a06 } /* Literal.String.Char */ -.highlight .dl { color: #4e9a06 } /* Literal.String.Delimiter */ -.highlight .sd { color: #8f5902; font-style: italic } /* Literal.String.Doc */ -.highlight .s2 { color: #4e9a06 } /* Literal.String.Double */ -.highlight .se { color: #4e9a06 } /* Literal.String.Escape */ -.highlight .sh { color: #4e9a06 } /* Literal.String.Heredoc */ -.highlight .si { color: #4e9a06 } /* Literal.String.Interpol */ -.highlight .sx { color: #4e9a06 } /* Literal.String.Other */ -.highlight .sr { color: #4e9a06 } /* Literal.String.Regex */ -.highlight .s1 { color: #4e9a06 } /* Literal.String.Single */ -.highlight .ss { color: #4e9a06 } /* Literal.String.Symbol */ -.highlight .bp { color: #3465a4 } /* Name.Builtin.Pseudo */ -.highlight .fm { color: #000000 } /* Name.Function.Magic */ -.highlight .vc { color: #000000 } /* Name.Variable.Class */ -.highlight .vg { color: #000000 } /* Name.Variable.Global */ -.highlight .vi { color: #000000 } /* Name.Variable.Instance */ -.highlight .vm { color: #000000 } /* Name.Variable.Magic */ -.highlight .il { color: #0000cf; font-weight: bold } /* Literal.Number.Integer.Long */ +.highlight pre { + line-height: 125%; +} +.highlight td.linenos .normal { + color: inherit; + background-color: transparent; + padding-left: 5px; + padding-right: 5px; +} +.highlight span.linenos { + color: inherit; + background-color: transparent; + padding-left: 5px; + padding-right: 5px; +} +.highlight td.linenos .special { + color: #000000; + background-color: #ffffc0; + padding-left: 5px; + padding-right: 5px; +} +.highlight span.linenos.special { + color: #000000; + background-color: #ffffc0; + padding-left: 5px; + padding-right: 5px; +} +.highlight .hll { + background-color: #ffffcc; +} +.highlight { + background: #f8f8f8; +} +.highlight .c { + color: #8f5902; + font-style: italic; +} /* Comment */ +.highlight .err { + color: #a40000; + border: 1px solid #ef2929; +} /* Error */ +.highlight .g { + color: #000000; +} /* Generic */ +.highlight .k { + color: #204a87; + font-weight: bold; +} /* Keyword */ +.highlight .l { + color: #000000; +} /* Literal */ +.highlight .n { + color: #000000; +} /* Name */ +.highlight .o { + color: #ce5c00; + font-weight: bold; +} /* Operator */ +.highlight .x { + color: #000000; +} /* Other */ +.highlight .p { + color: #000000; + font-weight: bold; +} /* Punctuation */ +.highlight .ch { + color: #8f5902; + font-style: italic; +} /* Comment.Hashbang */ +.highlight .cm { + color: #8f5902; + font-style: italic; +} /* Comment.Multiline */ +.highlight .cp { + color: #8f5902; + font-style: italic; +} /* Comment.Preproc */ +.highlight .cpf { + color: #8f5902; + font-style: italic; +} /* Comment.PreprocFile */ +.highlight .c1 { + color: #8f5902; + font-style: italic; +} /* Comment.Single */ +.highlight .cs { + color: #8f5902; + font-style: italic; +} /* Comment.Special */ +.highlight .gd { + color: #a40000; +} /* Generic.Deleted */ +.highlight .ge { + color: #000000; + font-style: italic; +} /* Generic.Emph */ +.highlight .ges { + color: #000000; + font-weight: bold; + font-style: italic; +} /* Generic.EmphStrong */ +.highlight .gr { + color: #ef2929; +} /* Generic.Error */ +.highlight .gh { + color: #000080; + font-weight: bold; +} /* Generic.Heading */ +.highlight .gi { + color: #00a000; +} /* Generic.Inserted */ +.highlight .go { + color: #000000; + font-style: italic; +} /* Generic.Output */ +.highlight .gp { + color: #8f5902; +} /* Generic.Prompt */ +.highlight .gs { + color: #000000; + font-weight: bold; +} /* Generic.Strong */ +.highlight .gu { + color: #800080; + font-weight: bold; +} /* Generic.Subheading */ +.highlight .gt { + color: #a40000; + font-weight: bold; +} /* Generic.Traceback */ +.highlight .kc { + color: #204a87; + font-weight: bold; +} /* Keyword.Constant */ +.highlight .kd { + color: #204a87; + font-weight: bold; +} /* Keyword.Declaration */ +.highlight .kn { + color: #204a87; + font-weight: bold; +} /* Keyword.Namespace */ +.highlight .kp { + color: #204a87; + font-weight: bold; +} /* Keyword.Pseudo */ +.highlight .kr { + color: #204a87; + font-weight: bold; +} /* Keyword.Reserved */ +.highlight .kt { + color: #204a87; + font-weight: bold; +} /* Keyword.Type */ +.highlight .ld { + color: #000000; +} /* Literal.Date */ +.highlight .m { + color: #0000cf; + font-weight: bold; +} /* Literal.Number */ +.highlight .s { + color: #4e9a06; +} /* Literal.String */ +.highlight .na { + color: #c4a000; +} /* Name.Attribute */ +.highlight .nb { + color: #204a87; +} /* Name.Builtin */ +.highlight .nc { + color: #000000; +} /* Name.Class */ +.highlight .no { + color: #000000; +} /* Name.Constant */ +.highlight .nd { + color: #5c35cc; + font-weight: bold; +} /* Name.Decorator */ +.highlight .ni { + color: #ce5c00; +} /* Name.Entity */ +.highlight .ne { + color: #cc0000; + font-weight: bold; +} /* Name.Exception */ +.highlight .nf { + color: #000000; +} /* Name.Function */ +.highlight .nl { + color: #f57900; +} /* Name.Label */ +.highlight .nn { + color: #000000; +} /* Name.Namespace */ +.highlight .nx { + color: #000000; +} /* Name.Other */ +.highlight .py { + color: #000000; +} /* Name.Property */ +.highlight .nt { + color: #204a87; + font-weight: bold; +} /* Name.Tag */ +.highlight .nv { + color: #000000; +} /* Name.Variable */ +.highlight .ow { + color: #204a87; + font-weight: bold; +} /* Operator.Word */ +.highlight .pm { + color: #000000; + font-weight: bold; +} /* Punctuation.Marker */ +.highlight .w { + color: #f8f8f8; +} /* Text.Whitespace */ +.highlight .mb { + color: #0000cf; + font-weight: bold; +} /* Literal.Number.Bin */ +.highlight .mf { + color: #0000cf; + font-weight: bold; +} /* Literal.Number.Float */ +.highlight .mh { + color: #0000cf; + font-weight: bold; +} /* Literal.Number.Hex */ +.highlight .mi { + color: #0000cf; + font-weight: bold; +} /* Literal.Number.Integer */ +.highlight .mo { + color: #0000cf; + font-weight: bold; +} /* Literal.Number.Oct */ +.highlight .sa { + color: #4e9a06; +} /* Literal.String.Affix */ +.highlight .sb { + color: #4e9a06; +} /* Literal.String.Backtick */ +.highlight .sc { + color: #4e9a06; +} /* Literal.String.Char */ +.highlight .dl { + color: #4e9a06; +} /* Literal.String.Delimiter */ +.highlight .sd { + color: #8f5902; + font-style: italic; +} /* Literal.String.Doc */ +.highlight .s2 { + color: #4e9a06; +} /* Literal.String.Double */ +.highlight .se { + color: #4e9a06; +} /* Literal.String.Escape */ +.highlight .sh { + color: #4e9a06; +} /* Literal.String.Heredoc */ +.highlight .si { + color: #4e9a06; +} /* Literal.String.Interpol */ +.highlight .sx { + color: #4e9a06; +} /* Literal.String.Other */ +.highlight .sr { + color: #4e9a06; +} /* Literal.String.Regex */ +.highlight .s1 { + color: #4e9a06; +} /* Literal.String.Single */ +.highlight .ss { + color: #4e9a06; +} /* Literal.String.Symbol */ +.highlight .bp { + color: #3465a4; +} /* Name.Builtin.Pseudo */ +.highlight .fm { + color: #000000; +} /* Name.Function.Magic */ +.highlight .vc { + color: #000000; +} /* Name.Variable.Class */ +.highlight .vg { + color: #000000; +} /* Name.Variable.Global */ +.highlight .vi { + color: #000000; +} /* Name.Variable.Instance */ +.highlight .vm { + color: #000000; +} /* Name.Variable.Magic */ +.highlight .il { + color: #0000cf; + font-weight: bold; +} /* Literal.Number.Integer.Long */ @media not print { -body[data-theme="dark"] .highlight pre { line-height: 125%; } -body[data-theme="dark"] .highlight td.linenos .normal { color: #aaaaaa; background-color: transparent; padding-left: 5px; padding-right: 5px; } -body[data-theme="dark"] .highlight span.linenos { color: #aaaaaa; background-color: transparent; padding-left: 5px; padding-right: 5px; } -body[data-theme="dark"] .highlight td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } -body[data-theme="dark"] .highlight span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } -body[data-theme="dark"] .highlight .hll { background-color: #404040 } -body[data-theme="dark"] .highlight { background: #202020; color: #d0d0d0 } -body[data-theme="dark"] .highlight .c { color: #ababab; font-style: italic } /* Comment */ -body[data-theme="dark"] .highlight .err { color: #a61717; background-color: #e3d2d2 } /* Error */ -body[data-theme="dark"] .highlight .esc { color: #d0d0d0 } /* Escape */ -body[data-theme="dark"] .highlight .g { color: #d0d0d0 } /* Generic */ -body[data-theme="dark"] .highlight .k { color: #6ebf26; font-weight: bold } /* Keyword */ -body[data-theme="dark"] .highlight .l { color: #d0d0d0 } /* Literal */ -body[data-theme="dark"] .highlight .n { color: #d0d0d0 } /* Name */ -body[data-theme="dark"] .highlight .o { color: #d0d0d0 } /* Operator */ -body[data-theme="dark"] .highlight .x { color: #d0d0d0 } /* Other */ -body[data-theme="dark"] .highlight .p { color: #d0d0d0 } /* Punctuation */ -body[data-theme="dark"] .highlight .ch { color: #ababab; font-style: italic } /* Comment.Hashbang */ -body[data-theme="dark"] .highlight .cm { color: #ababab; font-style: italic } /* Comment.Multiline */ -body[data-theme="dark"] .highlight .cp { color: #cd2828; font-weight: bold } /* Comment.Preproc */ -body[data-theme="dark"] .highlight .cpf { color: #ababab; font-style: italic } /* Comment.PreprocFile */ -body[data-theme="dark"] .highlight .c1 { color: #ababab; font-style: italic } /* Comment.Single */ -body[data-theme="dark"] .highlight .cs { color: #e50808; font-weight: bold; background-color: #520000 } /* Comment.Special */ -body[data-theme="dark"] .highlight .gd { color: #d22323 } /* Generic.Deleted */ -body[data-theme="dark"] .highlight .ge { color: #d0d0d0; font-style: italic } /* Generic.Emph */ -body[data-theme="dark"] .highlight .gr { color: #d22323 } /* Generic.Error */ -body[data-theme="dark"] .highlight .gh { color: #ffffff; font-weight: bold } /* Generic.Heading */ -body[data-theme="dark"] .highlight .gi { color: #589819 } /* Generic.Inserted */ -body[data-theme="dark"] .highlight .go { color: #cccccc } /* Generic.Output */ -body[data-theme="dark"] .highlight .gp { color: #aaaaaa } /* Generic.Prompt */ -body[data-theme="dark"] .highlight .gs { color: #d0d0d0; font-weight: bold } /* Generic.Strong */ -body[data-theme="dark"] .highlight .gu { color: #ffffff; text-decoration: underline } /* Generic.Subheading */ -body[data-theme="dark"] .highlight .gt { color: #d22323 } /* Generic.Traceback */ -body[data-theme="dark"] .highlight .kc { color: #6ebf26; font-weight: bold } /* Keyword.Constant */ -body[data-theme="dark"] .highlight .kd { color: #6ebf26; font-weight: bold } /* Keyword.Declaration */ -body[data-theme="dark"] .highlight .kn { color: #6ebf26; font-weight: bold } /* Keyword.Namespace */ -body[data-theme="dark"] .highlight .kp { color: #6ebf26 } /* Keyword.Pseudo */ -body[data-theme="dark"] .highlight .kr { color: #6ebf26; font-weight: bold } /* Keyword.Reserved */ -body[data-theme="dark"] .highlight .kt { color: #6ebf26; font-weight: bold } /* Keyword.Type */ -body[data-theme="dark"] .highlight .ld { color: #d0d0d0 } /* Literal.Date */ -body[data-theme="dark"] .highlight .m { color: #51b2fd } /* Literal.Number */ -body[data-theme="dark"] .highlight .s { color: #ed9d13 } /* Literal.String */ -body[data-theme="dark"] .highlight .na { color: #bbbbbb } /* Name.Attribute */ -body[data-theme="dark"] .highlight .nb { color: #2fbccd } /* Name.Builtin */ -body[data-theme="dark"] .highlight .nc { color: #71adff; text-decoration: underline } /* Name.Class */ -body[data-theme="dark"] .highlight .no { color: #40ffff } /* Name.Constant */ -body[data-theme="dark"] .highlight .nd { color: #ffa500 } /* Name.Decorator */ -body[data-theme="dark"] .highlight .ni { color: #d0d0d0 } /* Name.Entity */ -body[data-theme="dark"] .highlight .ne { color: #bbbbbb } /* Name.Exception */ -body[data-theme="dark"] .highlight .nf { color: #71adff } /* Name.Function */ -body[data-theme="dark"] .highlight .nl { color: #d0d0d0 } /* Name.Label */ -body[data-theme="dark"] .highlight .nn { color: #71adff; text-decoration: underline } /* Name.Namespace */ -body[data-theme="dark"] .highlight .nx { color: #d0d0d0 } /* Name.Other */ -body[data-theme="dark"] .highlight .py { color: #d0d0d0 } /* Name.Property */ -body[data-theme="dark"] .highlight .nt { color: #6ebf26; font-weight: bold } /* Name.Tag */ -body[data-theme="dark"] .highlight .nv { color: #40ffff } /* Name.Variable */ -body[data-theme="dark"] .highlight .ow { color: #6ebf26; font-weight: bold } /* Operator.Word */ -body[data-theme="dark"] .highlight .pm { color: #d0d0d0 } /* Punctuation.Marker */ -body[data-theme="dark"] .highlight .w { color: #666666 } /* Text.Whitespace */ -body[data-theme="dark"] .highlight .mb { color: #51b2fd } /* Literal.Number.Bin */ -body[data-theme="dark"] .highlight .mf { color: #51b2fd } /* Literal.Number.Float */ -body[data-theme="dark"] .highlight .mh { color: #51b2fd } /* Literal.Number.Hex */ -body[data-theme="dark"] .highlight .mi { color: #51b2fd } /* Literal.Number.Integer */ -body[data-theme="dark"] .highlight .mo { color: #51b2fd } /* Literal.Number.Oct */ -body[data-theme="dark"] .highlight .sa { color: #ed9d13 } /* Literal.String.Affix */ -body[data-theme="dark"] .highlight .sb { color: #ed9d13 } /* Literal.String.Backtick */ -body[data-theme="dark"] .highlight .sc { color: #ed9d13 } /* Literal.String.Char */ -body[data-theme="dark"] .highlight .dl { color: #ed9d13 } /* Literal.String.Delimiter */ -body[data-theme="dark"] .highlight .sd { color: #ed9d13 } /* Literal.String.Doc */ -body[data-theme="dark"] .highlight .s2 { color: #ed9d13 } /* Literal.String.Double */ -body[data-theme="dark"] .highlight .se { color: #ed9d13 } /* Literal.String.Escape */ -body[data-theme="dark"] .highlight .sh { color: #ed9d13 } /* Literal.String.Heredoc */ -body[data-theme="dark"] .highlight .si { color: #ed9d13 } /* Literal.String.Interpol */ -body[data-theme="dark"] .highlight .sx { color: #ffa500 } /* Literal.String.Other */ -body[data-theme="dark"] .highlight .sr { color: #ed9d13 } /* Literal.String.Regex */ -body[data-theme="dark"] .highlight .s1 { color: #ed9d13 } /* Literal.String.Single */ -body[data-theme="dark"] .highlight .ss { color: #ed9d13 } /* Literal.String.Symbol */ -body[data-theme="dark"] .highlight .bp { color: #2fbccd } /* Name.Builtin.Pseudo */ -body[data-theme="dark"] .highlight .fm { color: #71adff } /* Name.Function.Magic */ -body[data-theme="dark"] .highlight .vc { color: #40ffff } /* Name.Variable.Class */ -body[data-theme="dark"] .highlight .vg { color: #40ffff } /* Name.Variable.Global */ -body[data-theme="dark"] .highlight .vi { color: #40ffff } /* Name.Variable.Instance */ -body[data-theme="dark"] .highlight .vm { color: #40ffff } /* Name.Variable.Magic */ -body[data-theme="dark"] .highlight .il { color: #51b2fd } /* Literal.Number.Integer.Long */ -@media (prefers-color-scheme: dark) { -body:not([data-theme="light"]) .highlight pre { line-height: 125%; } -body:not([data-theme="light"]) .highlight td.linenos .normal { color: #aaaaaa; background-color: transparent; padding-left: 5px; padding-right: 5px; } -body:not([data-theme="light"]) .highlight span.linenos { color: #aaaaaa; background-color: transparent; padding-left: 5px; padding-right: 5px; } -body:not([data-theme="light"]) .highlight td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } -body:not([data-theme="light"]) .highlight span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } -body:not([data-theme="light"]) .highlight .hll { background-color: #404040 } -body:not([data-theme="light"]) .highlight { background: #202020; color: #d0d0d0 } -body:not([data-theme="light"]) .highlight .c { color: #ababab; font-style: italic } /* Comment */ -body:not([data-theme="light"]) .highlight .err { color: #a61717; background-color: #e3d2d2 } /* Error */ -body:not([data-theme="light"]) .highlight .esc { color: #d0d0d0 } /* Escape */ -body:not([data-theme="light"]) .highlight .g { color: #d0d0d0 } /* Generic */ -body:not([data-theme="light"]) .highlight .k { color: #6ebf26; font-weight: bold } /* Keyword */ -body:not([data-theme="light"]) .highlight .l { color: #d0d0d0 } /* Literal */ -body:not([data-theme="light"]) .highlight .n { color: #d0d0d0 } /* Name */ -body:not([data-theme="light"]) .highlight .o { color: #d0d0d0 } /* Operator */ -body:not([data-theme="light"]) .highlight .x { color: #d0d0d0 } /* Other */ -body:not([data-theme="light"]) .highlight .p { color: #d0d0d0 } /* Punctuation */ -body:not([data-theme="light"]) .highlight .ch { color: #ababab; font-style: italic } /* Comment.Hashbang */ -body:not([data-theme="light"]) .highlight .cm { color: #ababab; font-style: italic } /* Comment.Multiline */ -body:not([data-theme="light"]) .highlight .cp { color: #cd2828; font-weight: bold } /* Comment.Preproc */ -body:not([data-theme="light"]) .highlight .cpf { color: #ababab; font-style: italic } /* Comment.PreprocFile */ -body:not([data-theme="light"]) .highlight .c1 { color: #ababab; font-style: italic } /* Comment.Single */ -body:not([data-theme="light"]) .highlight .cs { color: #e50808; font-weight: bold; background-color: #520000 } /* Comment.Special */ -body:not([data-theme="light"]) .highlight .gd { color: #d22323 } /* Generic.Deleted */ -body:not([data-theme="light"]) .highlight .ge { color: #d0d0d0; font-style: italic } /* Generic.Emph */ -body:not([data-theme="light"]) .highlight .gr { color: #d22323 } /* Generic.Error */ -body:not([data-theme="light"]) .highlight .gh { color: #ffffff; font-weight: bold } /* Generic.Heading */ -body:not([data-theme="light"]) .highlight .gi { color: #589819 } /* Generic.Inserted */ -body:not([data-theme="light"]) .highlight .go { color: #cccccc } /* Generic.Output */ -body:not([data-theme="light"]) .highlight .gp { color: #aaaaaa } /* Generic.Prompt */ -body:not([data-theme="light"]) .highlight .gs { color: #d0d0d0; font-weight: bold } /* Generic.Strong */ -body:not([data-theme="light"]) .highlight .gu { color: #ffffff; text-decoration: underline } /* Generic.Subheading */ -body:not([data-theme="light"]) .highlight .gt { color: #d22323 } /* Generic.Traceback */ -body:not([data-theme="light"]) .highlight .kc { color: #6ebf26; font-weight: bold } /* Keyword.Constant */ -body:not([data-theme="light"]) .highlight .kd { color: #6ebf26; font-weight: bold } /* Keyword.Declaration */ -body:not([data-theme="light"]) .highlight .kn { color: #6ebf26; font-weight: bold } /* Keyword.Namespace */ -body:not([data-theme="light"]) .highlight .kp { color: #6ebf26 } /* Keyword.Pseudo */ -body:not([data-theme="light"]) .highlight .kr { color: #6ebf26; font-weight: bold } /* Keyword.Reserved */ -body:not([data-theme="light"]) .highlight .kt { color: #6ebf26; font-weight: bold } /* Keyword.Type */ -body:not([data-theme="light"]) .highlight .ld { color: #d0d0d0 } /* Literal.Date */ -body:not([data-theme="light"]) .highlight .m { color: #51b2fd } /* Literal.Number */ -body:not([data-theme="light"]) .highlight .s { color: #ed9d13 } /* Literal.String */ -body:not([data-theme="light"]) .highlight .na { color: #bbbbbb } /* Name.Attribute */ -body:not([data-theme="light"]) .highlight .nb { color: #2fbccd } /* Name.Builtin */ -body:not([data-theme="light"]) .highlight .nc { color: #71adff; text-decoration: underline } /* Name.Class */ -body:not([data-theme="light"]) .highlight .no { color: #40ffff } /* Name.Constant */ -body:not([data-theme="light"]) .highlight .nd { color: #ffa500 } /* Name.Decorator */ -body:not([data-theme="light"]) .highlight .ni { color: #d0d0d0 } /* Name.Entity */ -body:not([data-theme="light"]) .highlight .ne { color: #bbbbbb } /* Name.Exception */ -body:not([data-theme="light"]) .highlight .nf { color: #71adff } /* Name.Function */ -body:not([data-theme="light"]) .highlight .nl { color: #d0d0d0 } /* Name.Label */ -body:not([data-theme="light"]) .highlight .nn { color: #71adff; text-decoration: underline } /* Name.Namespace */ -body:not([data-theme="light"]) .highlight .nx { color: #d0d0d0 } /* Name.Other */ -body:not([data-theme="light"]) .highlight .py { color: #d0d0d0 } /* Name.Property */ -body:not([data-theme="light"]) .highlight .nt { color: #6ebf26; font-weight: bold } /* Name.Tag */ -body:not([data-theme="light"]) .highlight .nv { color: #40ffff } /* Name.Variable */ -body:not([data-theme="light"]) .highlight .ow { color: #6ebf26; font-weight: bold } /* Operator.Word */ -body:not([data-theme="light"]) .highlight .pm { color: #d0d0d0 } /* Punctuation.Marker */ -body:not([data-theme="light"]) .highlight .w { color: #666666 } /* Text.Whitespace */ -body:not([data-theme="light"]) .highlight .mb { color: #51b2fd } /* Literal.Number.Bin */ -body:not([data-theme="light"]) .highlight .mf { color: #51b2fd } /* Literal.Number.Float */ -body:not([data-theme="light"]) .highlight .mh { color: #51b2fd } /* Literal.Number.Hex */ -body:not([data-theme="light"]) .highlight .mi { color: #51b2fd } /* Literal.Number.Integer */ -body:not([data-theme="light"]) .highlight .mo { color: #51b2fd } /* Literal.Number.Oct */ -body:not([data-theme="light"]) .highlight .sa { color: #ed9d13 } /* Literal.String.Affix */ -body:not([data-theme="light"]) .highlight .sb { color: #ed9d13 } /* Literal.String.Backtick */ -body:not([data-theme="light"]) .highlight .sc { color: #ed9d13 } /* Literal.String.Char */ -body:not([data-theme="light"]) .highlight .dl { color: #ed9d13 } /* Literal.String.Delimiter */ -body:not([data-theme="light"]) .highlight .sd { color: #ed9d13 } /* Literal.String.Doc */ -body:not([data-theme="light"]) .highlight .s2 { color: #ed9d13 } /* Literal.String.Double */ -body:not([data-theme="light"]) .highlight .se { color: #ed9d13 } /* Literal.String.Escape */ -body:not([data-theme="light"]) .highlight .sh { color: #ed9d13 } /* Literal.String.Heredoc */ -body:not([data-theme="light"]) .highlight .si { color: #ed9d13 } /* Literal.String.Interpol */ -body:not([data-theme="light"]) .highlight .sx { color: #ffa500 } /* Literal.String.Other */ -body:not([data-theme="light"]) .highlight .sr { color: #ed9d13 } /* Literal.String.Regex */ -body:not([data-theme="light"]) .highlight .s1 { color: #ed9d13 } /* Literal.String.Single */ -body:not([data-theme="light"]) .highlight .ss { color: #ed9d13 } /* Literal.String.Symbol */ -body:not([data-theme="light"]) .highlight .bp { color: #2fbccd } /* Name.Builtin.Pseudo */ -body:not([data-theme="light"]) .highlight .fm { color: #71adff } /* Name.Function.Magic */ -body:not([data-theme="light"]) .highlight .vc { color: #40ffff } /* Name.Variable.Class */ -body:not([data-theme="light"]) .highlight .vg { color: #40ffff } /* Name.Variable.Global */ -body:not([data-theme="light"]) .highlight .vi { color: #40ffff } /* Name.Variable.Instance */ -body:not([data-theme="light"]) .highlight .vm { color: #40ffff } /* Name.Variable.Magic */ -body:not([data-theme="light"]) .highlight .il { color: #51b2fd } /* Literal.Number.Integer.Long */ + body[data-theme="dark"] .highlight pre { + line-height: 125%; + } + body[data-theme="dark"] .highlight td.linenos .normal { + color: #aaaaaa; + background-color: transparent; + padding-left: 5px; + padding-right: 5px; + } + body[data-theme="dark"] .highlight span.linenos { + color: #aaaaaa; + background-color: transparent; + padding-left: 5px; + padding-right: 5px; + } + body[data-theme="dark"] .highlight td.linenos .special { + color: #000000; + background-color: #ffffc0; + padding-left: 5px; + padding-right: 5px; + } + body[data-theme="dark"] .highlight span.linenos.special { + color: #000000; + background-color: #ffffc0; + padding-left: 5px; + padding-right: 5px; + } + body[data-theme="dark"] .highlight .hll { + background-color: #404040; + } + body[data-theme="dark"] .highlight { + background: #202020; + color: #d0d0d0; + } + body[data-theme="dark"] .highlight .c { + color: #ababab; + font-style: italic; + } /* Comment */ + body[data-theme="dark"] .highlight .err { + color: #a61717; + background-color: #e3d2d2; + } /* Error */ + body[data-theme="dark"] .highlight .esc { + color: #d0d0d0; + } /* Escape */ + body[data-theme="dark"] .highlight .g { + color: #d0d0d0; + } /* Generic */ + body[data-theme="dark"] .highlight .k { + color: #6ebf26; + font-weight: bold; + } /* Keyword */ + body[data-theme="dark"] .highlight .l { + color: #d0d0d0; + } /* Literal */ + body[data-theme="dark"] .highlight .n { + color: #d0d0d0; + } /* Name */ + body[data-theme="dark"] .highlight .o { + color: #d0d0d0; + } /* Operator */ + body[data-theme="dark"] .highlight .x { + color: #d0d0d0; + } /* Other */ + body[data-theme="dark"] .highlight .p { + color: #d0d0d0; + } /* Punctuation */ + body[data-theme="dark"] .highlight .ch { + color: #ababab; + font-style: italic; + } /* Comment.Hashbang */ + body[data-theme="dark"] .highlight .cm { + color: #ababab; + font-style: italic; + } /* Comment.Multiline */ + body[data-theme="dark"] .highlight .cp { + color: #ff3a3a; + font-weight: bold; + } /* Comment.Preproc */ + body[data-theme="dark"] .highlight .cpf { + color: #ababab; + font-style: italic; + } /* Comment.PreprocFile */ + body[data-theme="dark"] .highlight .c1 { + color: #ababab; + font-style: italic; + } /* Comment.Single */ + body[data-theme="dark"] .highlight .cs { + color: #e50808; + font-weight: bold; + background-color: #520000; + } /* Comment.Special */ + body[data-theme="dark"] .highlight .gd { + color: #d22323; + } /* Generic.Deleted */ + body[data-theme="dark"] .highlight .ge { + color: #d0d0d0; + font-style: italic; + } /* Generic.Emph */ + body[data-theme="dark"] .highlight .ges { + color: #d0d0d0; + font-weight: bold; + font-style: italic; + } /* Generic.EmphStrong */ + body[data-theme="dark"] .highlight .gr { + color: #d22323; + } /* Generic.Error */ + body[data-theme="dark"] .highlight .gh { + color: #ffffff; + font-weight: bold; + } /* Generic.Heading */ + body[data-theme="dark"] .highlight .gi { + color: #589819; + } /* Generic.Inserted */ + body[data-theme="dark"] .highlight .go { + color: #cccccc; + } /* Generic.Output */ + body[data-theme="dark"] .highlight .gp { + color: #aaaaaa; + } /* Generic.Prompt */ + body[data-theme="dark"] .highlight .gs { + color: #d0d0d0; + font-weight: bold; + } /* Generic.Strong */ + body[data-theme="dark"] .highlight .gu { + color: #ffffff; + text-decoration: underline; + } /* Generic.Subheading */ + body[data-theme="dark"] .highlight .gt { + color: #d22323; + } /* Generic.Traceback */ + body[data-theme="dark"] .highlight .kc { + color: #6ebf26; + font-weight: bold; + } /* Keyword.Constant */ + body[data-theme="dark"] .highlight .kd { + color: #6ebf26; + font-weight: bold; + } /* Keyword.Declaration */ + body[data-theme="dark"] .highlight .kn { + color: #6ebf26; + font-weight: bold; + } /* Keyword.Namespace */ + body[data-theme="dark"] .highlight .kp { + color: #6ebf26; + } /* Keyword.Pseudo */ + body[data-theme="dark"] .highlight .kr { + color: #6ebf26; + font-weight: bold; + } /* Keyword.Reserved */ + body[data-theme="dark"] .highlight .kt { + color: #6ebf26; + font-weight: bold; + } /* Keyword.Type */ + body[data-theme="dark"] .highlight .ld { + color: #d0d0d0; + } /* Literal.Date */ + body[data-theme="dark"] .highlight .m { + color: #51b2fd; + } /* Literal.Number */ + body[data-theme="dark"] .highlight .s { + color: #ed9d13; + } /* Literal.String */ + body[data-theme="dark"] .highlight .na { + color: #bbbbbb; + } /* Name.Attribute */ + body[data-theme="dark"] .highlight .nb { + color: #2fbccd; + } /* Name.Builtin */ + body[data-theme="dark"] .highlight .nc { + color: #71adff; + text-decoration: underline; + } /* Name.Class */ + body[data-theme="dark"] .highlight .no { + color: #40ffff; + } /* Name.Constant */ + body[data-theme="dark"] .highlight .nd { + color: #ffa500; + } /* Name.Decorator */ + body[data-theme="dark"] .highlight .ni { + color: #d0d0d0; + } /* Name.Entity */ + body[data-theme="dark"] .highlight .ne { + color: #bbbbbb; + } /* Name.Exception */ + body[data-theme="dark"] .highlight .nf { + color: #71adff; + } /* Name.Function */ + body[data-theme="dark"] .highlight .nl { + color: #d0d0d0; + } /* Name.Label */ + body[data-theme="dark"] .highlight .nn { + color: #71adff; + text-decoration: underline; + } /* Name.Namespace */ + body[data-theme="dark"] .highlight .nx { + color: #d0d0d0; + } /* Name.Other */ + body[data-theme="dark"] .highlight .py { + color: #d0d0d0; + } /* Name.Property */ + body[data-theme="dark"] .highlight .nt { + color: #6ebf26; + font-weight: bold; + } /* Name.Tag */ + body[data-theme="dark"] .highlight .nv { + color: #40ffff; + } /* Name.Variable */ + body[data-theme="dark"] .highlight .ow { + color: #6ebf26; + font-weight: bold; + } /* Operator.Word */ + body[data-theme="dark"] .highlight .pm { + color: #d0d0d0; + } /* Punctuation.Marker */ + body[data-theme="dark"] .highlight .w { + color: #666666; + } /* Text.Whitespace */ + body[data-theme="dark"] .highlight .mb { + color: #51b2fd; + } /* Literal.Number.Bin */ + body[data-theme="dark"] .highlight .mf { + color: #51b2fd; + } /* Literal.Number.Float */ + body[data-theme="dark"] .highlight .mh { + color: #51b2fd; + } /* Literal.Number.Hex */ + body[data-theme="dark"] .highlight .mi { + color: #51b2fd; + } /* Literal.Number.Integer */ + body[data-theme="dark"] .highlight .mo { + color: #51b2fd; + } /* Literal.Number.Oct */ + body[data-theme="dark"] .highlight .sa { + color: #ed9d13; + } /* Literal.String.Affix */ + body[data-theme="dark"] .highlight .sb { + color: #ed9d13; + } /* Literal.String.Backtick */ + body[data-theme="dark"] .highlight .sc { + color: #ed9d13; + } /* Literal.String.Char */ + body[data-theme="dark"] .highlight .dl { + color: #ed9d13; + } /* Literal.String.Delimiter */ + body[data-theme="dark"] .highlight .sd { + color: #ed9d13; + } /* Literal.String.Doc */ + body[data-theme="dark"] .highlight .s2 { + color: #ed9d13; + } /* Literal.String.Double */ + body[data-theme="dark"] .highlight .se { + color: #ed9d13; + } /* Literal.String.Escape */ + body[data-theme="dark"] .highlight .sh { + color: #ed9d13; + } /* Literal.String.Heredoc */ + body[data-theme="dark"] .highlight .si { + color: #ed9d13; + } /* Literal.String.Interpol */ + body[data-theme="dark"] .highlight .sx { + color: #ffa500; + } /* Literal.String.Other */ + body[data-theme="dark"] .highlight .sr { + color: #ed9d13; + } /* Literal.String.Regex */ + body[data-theme="dark"] .highlight .s1 { + color: #ed9d13; + } /* Literal.String.Single */ + body[data-theme="dark"] .highlight .ss { + color: #ed9d13; + } /* Literal.String.Symbol */ + body[data-theme="dark"] .highlight .bp { + color: #2fbccd; + } /* Name.Builtin.Pseudo */ + body[data-theme="dark"] .highlight .fm { + color: #71adff; + } /* Name.Function.Magic */ + body[data-theme="dark"] .highlight .vc { + color: #40ffff; + } /* Name.Variable.Class */ + body[data-theme="dark"] .highlight .vg { + color: #40ffff; + } /* Name.Variable.Global */ + body[data-theme="dark"] .highlight .vi { + color: #40ffff; + } /* Name.Variable.Instance */ + body[data-theme="dark"] .highlight .vm { + color: #40ffff; + } /* Name.Variable.Magic */ + body[data-theme="dark"] .highlight .il { + color: #51b2fd; + } /* Literal.Number.Integer.Long */ + @media (prefers-color-scheme: dark) { + body:not([data-theme="light"]) .highlight pre { + line-height: 125%; + } + body:not([data-theme="light"]) .highlight td.linenos .normal { + color: #aaaaaa; + background-color: transparent; + padding-left: 5px; + padding-right: 5px; + } + body:not([data-theme="light"]) .highlight span.linenos { + color: #aaaaaa; + background-color: transparent; + padding-left: 5px; + padding-right: 5px; + } + body:not([data-theme="light"]) .highlight td.linenos .special { + color: #000000; + background-color: #ffffc0; + padding-left: 5px; + padding-right: 5px; + } + body:not([data-theme="light"]) .highlight span.linenos.special { + color: #000000; + background-color: #ffffc0; + padding-left: 5px; + padding-right: 5px; + } + body:not([data-theme="light"]) .highlight .hll { + background-color: #404040; + } + body:not([data-theme="light"]) .highlight { + background: #202020; + color: #d0d0d0; + } + body:not([data-theme="light"]) .highlight .c { + color: #ababab; + font-style: italic; + } /* Comment */ + body:not([data-theme="light"]) .highlight .err { + color: #a61717; + background-color: #e3d2d2; + } /* Error */ + body:not([data-theme="light"]) .highlight .esc { + color: #d0d0d0; + } /* Escape */ + body:not([data-theme="light"]) .highlight .g { + color: #d0d0d0; + } /* Generic */ + body:not([data-theme="light"]) .highlight .k { + color: #6ebf26; + font-weight: bold; + } /* Keyword */ + body:not([data-theme="light"]) .highlight .l { + color: #d0d0d0; + } /* Literal */ + body:not([data-theme="light"]) .highlight .n { + color: #d0d0d0; + } /* Name */ + body:not([data-theme="light"]) .highlight .o { + color: #d0d0d0; + } /* Operator */ + body:not([data-theme="light"]) .highlight .x { + color: #d0d0d0; + } /* Other */ + body:not([data-theme="light"]) .highlight .p { + color: #d0d0d0; + } /* Punctuation */ + body:not([data-theme="light"]) .highlight .ch { + color: #ababab; + font-style: italic; + } /* Comment.Hashbang */ + body:not([data-theme="light"]) .highlight .cm { + color: #ababab; + font-style: italic; + } /* Comment.Multiline */ + body:not([data-theme="light"]) .highlight .cp { + color: #ff3a3a; + font-weight: bold; + } /* Comment.Preproc */ + body:not([data-theme="light"]) .highlight .cpf { + color: #ababab; + font-style: italic; + } /* Comment.PreprocFile */ + body:not([data-theme="light"]) .highlight .c1 { + color: #ababab; + font-style: italic; + } /* Comment.Single */ + body:not([data-theme="light"]) .highlight .cs { + color: #e50808; + font-weight: bold; + background-color: #520000; + } /* Comment.Special */ + body:not([data-theme="light"]) .highlight .gd { + color: #d22323; + } /* Generic.Deleted */ + body:not([data-theme="light"]) .highlight .ge { + color: #d0d0d0; + font-style: italic; + } /* Generic.Emph */ + body:not([data-theme="light"]) .highlight .ges { + color: #d0d0d0; + font-weight: bold; + font-style: italic; + } /* Generic.EmphStrong */ + body:not([data-theme="light"]) .highlight .gr { + color: #d22323; + } /* Generic.Error */ + body:not([data-theme="light"]) .highlight .gh { + color: #ffffff; + font-weight: bold; + } /* Generic.Heading */ + body:not([data-theme="light"]) .highlight .gi { + color: #589819; + } /* Generic.Inserted */ + body:not([data-theme="light"]) .highlight .go { + color: #cccccc; + } /* Generic.Output */ + body:not([data-theme="light"]) .highlight .gp { + color: #aaaaaa; + } /* Generic.Prompt */ + body:not([data-theme="light"]) .highlight .gs { + color: #d0d0d0; + font-weight: bold; + } /* Generic.Strong */ + body:not([data-theme="light"]) .highlight .gu { + color: #ffffff; + text-decoration: underline; + } /* Generic.Subheading */ + body:not([data-theme="light"]) .highlight .gt { + color: #d22323; + } /* Generic.Traceback */ + body:not([data-theme="light"]) .highlight .kc { + color: #6ebf26; + font-weight: bold; + } /* Keyword.Constant */ + body:not([data-theme="light"]) .highlight .kd { + color: #6ebf26; + font-weight: bold; + } /* Keyword.Declaration */ + body:not([data-theme="light"]) .highlight .kn { + color: #6ebf26; + font-weight: bold; + } /* Keyword.Namespace */ + body:not([data-theme="light"]) .highlight .kp { + color: #6ebf26; + } /* Keyword.Pseudo */ + body:not([data-theme="light"]) .highlight .kr { + color: #6ebf26; + font-weight: bold; + } /* Keyword.Reserved */ + body:not([data-theme="light"]) .highlight .kt { + color: #6ebf26; + font-weight: bold; + } /* Keyword.Type */ + body:not([data-theme="light"]) .highlight .ld { + color: #d0d0d0; + } /* Literal.Date */ + body:not([data-theme="light"]) .highlight .m { + color: #51b2fd; + } /* Literal.Number */ + body:not([data-theme="light"]) .highlight .s { + color: #ed9d13; + } /* Literal.String */ + body:not([data-theme="light"]) .highlight .na { + color: #bbbbbb; + } /* Name.Attribute */ + body:not([data-theme="light"]) .highlight .nb { + color: #2fbccd; + } /* Name.Builtin */ + body:not([data-theme="light"]) .highlight .nc { + color: #71adff; + text-decoration: underline; + } /* Name.Class */ + body:not([data-theme="light"]) .highlight .no { + color: #40ffff; + } /* Name.Constant */ + body:not([data-theme="light"]) .highlight .nd { + color: #ffa500; + } /* Name.Decorator */ + body:not([data-theme="light"]) .highlight .ni { + color: #d0d0d0; + } /* Name.Entity */ + body:not([data-theme="light"]) .highlight .ne { + color: #bbbbbb; + } /* Name.Exception */ + body:not([data-theme="light"]) .highlight .nf { + color: #71adff; + } /* Name.Function */ + body:not([data-theme="light"]) .highlight .nl { + color: #d0d0d0; + } /* Name.Label */ + body:not([data-theme="light"]) .highlight .nn { + color: #71adff; + text-decoration: underline; + } /* Name.Namespace */ + body:not([data-theme="light"]) .highlight .nx { + color: #d0d0d0; + } /* Name.Other */ + body:not([data-theme="light"]) .highlight .py { + color: #d0d0d0; + } /* Name.Property */ + body:not([data-theme="light"]) .highlight .nt { + color: #6ebf26; + font-weight: bold; + } /* Name.Tag */ + body:not([data-theme="light"]) .highlight .nv { + color: #40ffff; + } /* Name.Variable */ + body:not([data-theme="light"]) .highlight .ow { + color: #6ebf26; + font-weight: bold; + } /* Operator.Word */ + body:not([data-theme="light"]) .highlight .pm { + color: #d0d0d0; + } /* Punctuation.Marker */ + body:not([data-theme="light"]) .highlight .w { + color: #666666; + } /* Text.Whitespace */ + body:not([data-theme="light"]) .highlight .mb { + color: #51b2fd; + } /* Literal.Number.Bin */ + body:not([data-theme="light"]) .highlight .mf { + color: #51b2fd; + } /* Literal.Number.Float */ + body:not([data-theme="light"]) .highlight .mh { + color: #51b2fd; + } /* Literal.Number.Hex */ + body:not([data-theme="light"]) .highlight .mi { + color: #51b2fd; + } /* Literal.Number.Integer */ + body:not([data-theme="light"]) .highlight .mo { + color: #51b2fd; + } /* Literal.Number.Oct */ + body:not([data-theme="light"]) .highlight .sa { + color: #ed9d13; + } /* Literal.String.Affix */ + body:not([data-theme="light"]) .highlight .sb { + color: #ed9d13; + } /* Literal.String.Backtick */ + body:not([data-theme="light"]) .highlight .sc { + color: #ed9d13; + } /* Literal.String.Char */ + body:not([data-theme="light"]) .highlight .dl { + color: #ed9d13; + } /* Literal.String.Delimiter */ + body:not([data-theme="light"]) .highlight .sd { + color: #ed9d13; + } /* Literal.String.Doc */ + body:not([data-theme="light"]) .highlight .s2 { + color: #ed9d13; + } /* Literal.String.Double */ + body:not([data-theme="light"]) .highlight .se { + color: #ed9d13; + } /* Literal.String.Escape */ + body:not([data-theme="light"]) .highlight .sh { + color: #ed9d13; + } /* Literal.String.Heredoc */ + body:not([data-theme="light"]) .highlight .si { + color: #ed9d13; + } /* Literal.String.Interpol */ + body:not([data-theme="light"]) .highlight .sx { + color: #ffa500; + } /* Literal.String.Other */ + body:not([data-theme="light"]) .highlight .sr { + color: #ed9d13; + } /* Literal.String.Regex */ + body:not([data-theme="light"]) .highlight .s1 { + color: #ed9d13; + } /* Literal.String.Single */ + body:not([data-theme="light"]) .highlight .ss { + color: #ed9d13; + } /* Literal.String.Symbol */ + body:not([data-theme="light"]) .highlight .bp { + color: #2fbccd; + } /* Name.Builtin.Pseudo */ + body:not([data-theme="light"]) .highlight .fm { + color: #71adff; + } /* Name.Function.Magic */ + body:not([data-theme="light"]) .highlight .vc { + color: #40ffff; + } /* Name.Variable.Class */ + body:not([data-theme="light"]) .highlight .vg { + color: #40ffff; + } /* Name.Variable.Global */ + body:not([data-theme="light"]) .highlight .vi { + color: #40ffff; + } /* Name.Variable.Instance */ + body:not([data-theme="light"]) .highlight .vm { + color: #40ffff; + } /* Name.Variable.Magic */ + body:not([data-theme="light"]) .highlight .il { + color: #51b2fd; + } /* Literal.Number.Integer.Long */ + } } -} \ No newline at end of file diff --git a/docs/build/html/about.html b/docs/build/html/about.html index d4a5bb72..e1ecdb5d 100644 --- a/docs/build/html/about.html +++ b/docs/build/html/about.html @@ -1,251 +1,526 @@ - + - - - + + + + + + + + + + - - Skills Extractor - Skills Extractor v1.0.1 documentation - - - - - + + Skills Extractor - Skills Extractor v1.0.1 documentation + + + - - + + - - - - - Contents - - - - - - Menu - - - - - - - - Expand - - - - - - Light mode - - - - - - - - - - - - - - Dark mode - - - - - - - Auto light/dark mode - - - - - - - - + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark mode + + + + + + + + + + - - - - - - - - - - -
-
-
- -
- -
-
- -
- -
-
- -
-
-
- - - - - Back to top - -
- -
+
+
+
+ +
+ +
+
-
-
-
-

Skills Extractor#

-
    -
  • Installation

  • -
  • Using Nesta’s Skills Extractor library

  • -
  • Development

  • -
-
-

Welcome to Nesta’s Skills Extractor Library#

-

Welcome to the documentation of Nesta’s skills extractor library.

-

This page contains information on how to install and use Nesta’s skills extraction library. The skills library allows you to extract skills phrases from job advertisement texts and maps them onto a skills taxonomy of your choice.

-

-

We currently support three different taxonomies to map onto: the European Commission’s European Skills, Competences, and Occupations (ESCO), Lightcast’s Open Skills and a “toy” taxonomy developed internally for the purpose of testing.

-

If you’d like to learn more about the models used in the library, please refer to the model card page.

-

You may also want to read more about the wider project by reading:

-
    -
  1. Our Introduction blog

  2. -
  3. Our interactive analysis blog

  4. -
-
-
-

Installation #

-

You can use pip to install the library:

-
pip install ojd-daps-skills
-
-
-

You will also need to download spaCy’s en_core_web_sm model:

-
python -m spacy download en_core_web_sm
-
-
-
-

AWS CLI#

-

When the package is first used it will automatically download a folder of neccessary data and models. This file is ~ 1GB. Although you don’t need to have AWS credentials for this to work, you will need to download the AWS CLI.

-
-
-
-

TL;DR: Using Nesta’s Skills Extractor library #

-

The library supports three key skills extraction functionalities :

-
    -
  1. Extract AND map skills to a taxonomy of your choice;

  2. -
  3. Extract skills from job adverts;

  4. -
  5. Map a list of skills to a taxonomy of your choice.

  6. -
-

The option local=False can only be used by those with access to Nesta’s S3 bucket.

-
-

1. Extract AND map skills#

-

If you would like to extract AND map skills in one step, you are able to do so with the extract_skills method.

-
from ojd_daps_skills.pipeline.extract_skills.extract_skills import ExtractSkills #import the module
+      
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+
+

+ Skills Extractor# +

+ +
+

+ Welcome to Nesta’s Skills Extractor Library# +

+

+ Welcome to the documentation of Nesta’s skills extractor + library. +

+

+ This page contains information on how to install and use + Nesta’s skills extraction library. The skills library allows + you to extract skills phrases from job advertisement texts + and maps them onto a skills taxonomy of your choice. +

+

+

+ We currently support three different taxonomies to map onto: + the + European Commission’s European Skills, Competences, and + Occupations (ESCO), + Lightcast’s Open Skills + and a “toy” taxonomy developed internally for the purpose of + testing. +

+

+ If you’d like to learn more about the models used in the + library, please refer to the + model card page. +

+

+ You may also want to read more about the wider project by + reading: +

+
    +
  1. +

    + Our + Introduction blog +

    +
  2. +
  3. +

    + Our + interactive analysis blog +

    +
  4. +
+
+
+

+ Installation # +

+

You can use pip to install the library:

+
+
+
pip install ojd-daps-skills
+
+
+
+

+ Note that this package was developed on MacOS and tested on + Ubuntu. Changes have been made to be compatible on a Windows + system but are not tested and cannot be guaranteed. +

+

+ When the package is first used it will automatically + download a folder of neccessary data and models. (~1GB) +

+
+
+

+ TL;DR: Using Nesta’s Skills Extractor library + # +

+

+ The library supports three key skills extraction + functionalities : +

+
    +
  1. +

    + Extract AND map skills to a taxonomy of your choice; +

    +
  2. +
  3. Extract skills from job adverts;

  4. +
  5. +

    Map a list of skills to a taxonomy of your choice.

    +
  6. +
+

+ The option + local=False + can only be used by those with access to Nesta’s S3 bucket. +

+
+

+ 1. Extract AND map skills# +

+

+ If you would like to extract AND map skills in one step, + you are able to do so with the + extract_skills + method. +

+
+
+
from ojd_daps_skills.pipeline.extract_skills.extract_skills import ExtractSkills #import the module
 
 es = ExtractSkills(config_name="extract_skills_toy", local=True) #instantiate with toy taxonomy configuration file
 
@@ -257,18 +532,34 @@ 

1. Extract AND map skills] #toy job advert examples job_skills_matched = es.extract_skills(job_adverts) #match and extract skills to toy taxonomy -

-
-

The outputs are as follows:

-
job_skills_matched
+
+
+
+

The outputs are as follows:

+
+
+
job_skills_matched
 >>> [{'SKILL': [('communication skills', ('communication, collaboration and creativity', 'S1')), ('maths skills', ('working with computers', 'S5'))]}, {'SKILL': [('Excel skills', ('working with computers', 'S5')), ('presentation skills', ('communication, collaboration and creativity', 'S1'))]}]
-
-
-
-
-

2. Extract skills#

-

You can simply extract skills from a job advert or list of job adverts:

-
from ojd_daps_skills.pipeline.extract_skills.extract_skills import ExtractSkills #import the module
+
+
+
+ +
+

+ 2. Extract skills# +

+

+ You can simply extract skills from a job advert or list of + job adverts: +

+
+
+
from ojd_daps_skills.pipeline.extract_skills.extract_skills import ExtractSkills #import the module
 
 es = ExtractSkills(config_name="extract_skills_toy", local=True) #instantiate with toy taxonomy configuration file
 
@@ -280,19 +571,43 @@ 

2. Extract skills] #toy job advert examples predicted_skills = es.get_skills(job_adverts) #extract skills from list of job adverts -

-
-

The outputs are as follows:

-
predicted_skills
+
+
+
+

The outputs are as follows:

+
+
+
predicted_skills
 [{'EXPERIENCE': [], 'SKILL': ['communication skills', 'maths skills'], 'MULTISKILL': []}, {'EXPERIENCE': [], 'SKILL': ['Excel skills', 'presentation skills'], 'MULTISKILL': []}]
 
-
-
-
-
-

3. Map skills#

-

You can map either the predicted_skills output from get_stills or simply map a list of skills to a taxonomy of your choice. In this instance, we map a list of skills:

-
from ojd_daps_skills.pipeline.extract_skills.extract_skills import ExtractSkills #import the module
+
+
+
+ +
+

+ 3. Map skills# +

+

+ You can map either the + predicted_skills + output from + get_stills + or simply map a list of skills to a taxonomy of your + choice. In this instance, we map a list of skills: +

+
+
+
from ojd_daps_skills.pipeline.extract_skills.extract_skills import ExtractSkills #import the module
 
 es = ExtractSkills(config_name="extract_skills_toy", local=True) #instantiate with toy taxonomy configuration file
 
@@ -305,180 +620,445 @@ 

3. Map skills] #list of skills (and/or multiskills) to be matched skills_list_matched = es.map_skills(skills_list) #match formatted skills to toy taxonomy -

-
-

The outputs are as follows:

-
skills_list_matched
+
+
+
+

The outputs are as follows:

+
+
+
skills_list_matched
 >>> [{'SKILL': [('Excel skills', ('working with computers', 'S5')), ('Communication', ('use communication techniques', 'cdef')), ('working with computers', ('communication, collaboration and creativity', 'S1'))]}]
-
-
-
-
-
-

App#

-

If you would like to demo the library using a front end, we have also built a streamlit app that allows you to extract skills for a given text. The app allows you to paste a job advert of your choice, extract and map skills onto any of the configurations: extract_skills_lightcast and extract_skills_esco.

-

nesta_esco

-
-
-

Development #

-

If you’d like to modify or develop the source code you can clone it by first running:

-
git clone git@github.com:nestauk/ojd_daps_skills.git
-
-
-
-

Setup#

-
    -
  • Meet the data science cookiecutter requirements, in brief:

    -
      -
    • Install: direnv and conda

    • -
    -
  • -
  • Create a blank cookiecutter conda log file:

    -
      -
    • mkdir .cookiecutter/state

    • -
    • touch .cookiecutter/state/conda-create.log

    • -
    -
  • -
  • Run make install to configure the development environment

  • -
  • Download spacy model:

    -
      -
    • python -m spacy download en_core_web_sm

    • -
    -
  • -
-

If you don’t have the AWS CLI installed - you can download a zipped folder of the data by clicking here. After downloading and unzipping, it is important that this folder is moved to the project’s parent folder - i.e. ojd_daps_skills/.

-
-
-

Project structure#

-

The project is split into three core pipeline folders:

-
    -
  • skill_ner - Training a Named Entity Recognition (NER) model to extract skills from job adverts.

  • -
  • skill_ner_mapping - Matching skills to an existing skills taxonomy using semantic similarity.

  • -
  • extract_skills - User friendly functionality to extract and map skills from job adverts.

  • -
-

Much more about these steps can be found in each of the pipeline folder READMEs.

-

- -An example of extracting skills and mapping them to the ESCO taxonomy.

-
-
-

Testing#

-

Some functions have tests, these can be checked by running

-
pytest
-
-
-
-
-

Analysis#

-

Various pieces of analysis are done in the analysis folder. These require access to various datasets from Nesta’s private S3 bucket and are therefore only designed for internal Nesta use.

-
-
-

Contributor guidelines#

-

The technical and working style guidelines can be found here.

-
-

This project was made possible via funding from the Economic Statistics Centre of Excellence

-

Project template is based on Nesta’s data science project template -(Read the docs here). -

-
-
-
+ +
+
+ + +
+

+ App# +

+

+ If you would like to demo the library using a front end, we + have also + built a streamlit app + that allows you to extract skills for a given text. The app + allows you to paste a job advert of your choice, extract and + map skills onto any of the configurations: + extract_skills_lightcast + and + extract_skills_esco. +

+

+ nesta_esco +

+
+
+

+ Development # +

+

+ If you’d like to modify or develop the source code you can + clone it by first running: +

+
+
+
git clone git@github.com:nestauk/ojd_daps_skills.git
+
+
+
+
+

+ Setup# +

+
    +
  • +

    + Meet the data science cookiecutter + requirements, in brief: +

    +
      +
    • +

      + Install: + direnv + and + conda +

      +
    • +
    +
  • +
  • +

    Create a blank cookiecutter conda log file:

    +
      +
    • +

      + mkdir + .cookiecutter/state +

      +
    • +
    • +

      + touch + .cookiecutter/state/conda-create.log +

      +
    • +
    +
  • +
  • +

    + Run + make + install + to configure the development environment +

    +
  • +
+
+
+

+ Project structure# +

+

+ The project is split into three core pipeline folders: +

+
    +
  • +

    + skill_ner + - Training a Named Entity Recognition (NER) model to + extract skills from job adverts. +

    +
  • +
  • +

    + skill_ner_mapping + - Matching skills to an existing skills taxonomy using + semantic similarity. +

    +
  • +
  • +

    + extract_skills + - User friendly functionality to extract and map + skills from job adverts. +

    +
  • +
+

+ Much more about these steps can be found in each of the + pipeline folder READMEs. +

+

+ + + An example of extracting skills and mapping them to the + ESCO taxonomy. +

+
+
+

+ Testing# +

+

+ Some functions have tests, these can be checked by running +

+
+
+
pytest
+
+
+
+
+
+

+ Analysis# +

+

+ Various pieces of analysis are done in the + analysis folder. These require access to various datasets from Nesta’s + private S3 bucket and are therefore only designed for + internal Nesta use. +

+
+
+

+ Contributor guidelines# +

+

+ The technical and working style guidelines can be found + here. +

+

+ If contributing, changes will need to be pushed to a new + branch in order for our code checks to be triggered. +

+
+

+

+ This project was made possible via funding from the + Economic Statistics Centre of Excellence +

+

+

+

+ Project template is based on + Nesta’s data science project template + (Read the docs here). +

+

+
+
+ + +
+ -
-
- - - -
-
+ + - - \ No newline at end of file + + diff --git a/docs/build/html/custom_usage.html b/docs/build/html/custom_usage.html index dbb3a4fe..f1e48ec0 100644 --- a/docs/build/html/custom_usage.html +++ b/docs/build/html/custom_usage.html @@ -1,443 +1,1248 @@ - + - - - + + + + + + + + + + - - Custom Usage - Skills Extractor v1.0.1 documentation - - - - - + + Custom Usage - Skills Extractor v1.0.1 documentation + + + - - + + - - - - - Contents - - - - - - Menu - - - - - - - - Expand - - - - - - Light mode - - - - - - - - - - - - - - Dark mode - - - - - - - Auto light/dark mode - - - - - - - - + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark mode + + + + + + + + + + - - - - - - - - - - -
-
-
- -
- -
-
- -
- -
-
- -
-
-
- - - - - Back to top - -
- -
+
+
+
+ +
+ +
+
-
-
-
-

Custom Usage#

-

extract_skills.py combines the prediction of skills using code from skill_ner with the mapping of skills to a taxonomy using code from skill_ner_mapping.

-

This page explains more about the custom usage of this class including creating a custom config file and mapping to another taxonomy. To do this you will need to clone the repo. Please refer to the main documentation page for the development setup instructions for this package and the core usage.

-
-

Configuration files #

-

Core to the Extract Skills package, and in particular the taxonomy mapping functionality, is config files. These are included in the instantiation of the class, as so:

-
es = ExtractSkills(config_name="extract_skills_toy")
-
-
-
-

Predefined configurations #

-

There are currently three configurations available for running the skills extraction algorithm. These configurations contain information about parameter values, trained models and directory locations of stored data.

-
    -
  1. extract_skills_toy - Configuration for a toy taxonomy example, useful for testing.

  2. -
  3. extract_skills_esco - Configuration for extracting skills and matching them to the ESCO skills taxonomy. This configuration is correct to v1.1.1 of ESCO.

  4. -
  5. extract_skills_lightcast - Configuration for extracting skills and matching them to the Lightcast skills taxonomy. This configuration is correct to the version of Lightcast as of 22/11/22.

  6. -
-

If you are mapping to the ESCO skills taxonomy using extract_skills_esco.yaml, we reviewed the top 100 skills and ultimately hard coded 43 of the most common skills which were not well matched from a random sample of 100,000 job adverts in the Open Jobs Observatory project with the most appropriate skills from the taxonomy.

-
-
-

Configuration definitions #

-

Every predefined configuration includes the following parameters:

-
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

Parameter

Description

ner_model_path: str

The relative path to the NER model folder used to predict skill spans in job adverts.

taxonomy_name: str

The name of the taxonomy to map onto.

taxonomy_path: str

The relative path to the formatted taxonomy. Formatted taxonomy must be in .csv format.

clean_job_ads: bool, default=True

Whether to perform light text cleaning on job adverts or not. Text cleaning includes detecting and splitting camelcase in job adverts, replacing various characters and converting bullet points to full stops. Defaults to True.

min_multiskill_length: int

The minimum character length a predicted multi-skill sentence must be to apply splitting rules to.

(optional) taxonomy_embedding_file_name: str

The relative path to a taxonomy embedding file if it exists. If left unset the embeddings will be generated when the code is run.

(optional) prev_skill_matches_file_name: str

The relative path to a previous skill matches file if it exists.

(optional) hard_labelled_skills_file_name: str

The relative path to a hard labelled skills file if it exists.

(optional) hier_name_mapper_file_name: str

The relative path to a hierarchy name mapper file if it exists.

num_hier_levels: int

The number of levels in the skills taxonomy hierarchy. This can be set to 0 if the taxonomy has no levels.

skill_type_dict: dict

A dictionary that defines skill types and hierarchy types.

{ "skill_types": [A list of the values of the 'type' column which code skills], "hier_types": [A list of the values of the 'type' column which code skill groups, these need to be in order from least to most granular]}

match_thresholds_dict: dict

A dictionary that defines thresholds at each level of the skills taxonomy hierarchy. For example,

{"skill_match_thresh": 0.7, "top_tax_skills": {1: 0.5, 2: 0.5, 3: 0.5},“max_share”: {1: 0, 2: 0.2, 3: 0.2}}

See Model Card: Skills to Taxonomy Mapping for the details of what these thresholds represent.

skill_name_col: str

The name of the skill/hierarchy level description text column in formatted taxonomy .csv.

skill_id_col: str

Name of skill id column in formatted taxonomy .csv. Each row should contain a unique ID for the skill/hierarchy.

(optional) skill_hier_info_col: str

Name of hierarchy info column in formatted taxonomy .csv. The hierarchy info column contains which hierarchy levels a skill is in (from least to most granular). If not a skill, then NA.

skill_type_col: str

Name of what column name the skill/hier description is from (category, subcategory) in formatted taxonomy .csv.

-
-
-
-
-

Mapping to your own taxonomy #

-

Although we currently support three configurations for running the skills extraction algorithm, you are also able to map extracted skills onto a taxonomy of your choice by defining your own configuration file. In order to map skills onto your own taxonomy you must:

-
    -
  1. Format your taxonomy

  2. -
  3. Define your own configuration file

  4. -
-
-

Format your taxonomy #

-

You must also format your taxonomy in such a way that looks like the following:

-
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

skill_type_col

skill_name_col

skill_id_col

(optional) skill_hier_info_col

skill

use spreadsheets software

abcd

[["S", "S5", "S5.6", "S5.6.1"], ["S", "S5", "S5.5", "S5.5.2"]]

skill

use communication techniques

cdef

[["S", "S1", "S1.0", "S1.0.0"]]

skill_group_3

communication, collaboration and creativity

S1.0.0

NaN

skill_group_3

mathematics

S1.2.1

NaN

skill_group_2

presenting information

S1.4

NaN

-
-

You will see the skill_type_col column contains skills and skill groups. This is because we try to match to individual skills, but if this isn’t possible we then try to match to a skill group in the taxonomy (if given).

-

For rows which correspond to individual skills (rather than skill groups) the skill_hier_info_col column values show all the parts of the taxonomy where this skill is situated. It is helpful to link these codes to names, so you may also want to create a taxonomy name mapper file for this data, e.g. {"S1.2.1": "mathematics"}. For rows which correspond to skill groups (rather than individual skills) the skill_hier_info_col column will be blank since the hierarchy information is contained in the skill_id_col column. The contents of skill_hier_info_col need to be a list of lists, or a list of strings, but not a combination of both.

-

The number of levels in the taxonomy will correspond to the length of the lists in the skill_hier_info_col column.

-

Although we don’t provide guidance on re-formatting your taxonomy, we have re-formatted the ESCO taxonomy to this format in this script and we have re-formatted the Lightcast taxonomy to this format in this script.

-
-
-

Define your own configuration file #

-

Create your own configuration yaml file in the format extract_skills_taxonomy_name.yaml. This config should contain all the parameters as described in Predefined configuration definitions. The file should be saved to your_current_path/ojd_daps_skills/config/.

-

We provide a template config file here.

-

It is important that the list given in skill_type_dict['hier_types'] is in the order from the least to most granular parts of the taxonomy. For example, in the ESCO taxonomy we match against the second and third skill group levels, so this is set to ["level_2", "level_3"] i.e. level 3 is more granular than level 2, where level 2 skill groups > level 3 skill groups > individual skill.

-

Now you can use your custom taxonomy as:

-
from ojd_daps_skills.pipeline.extract_skills.extract_skills import ExtractSkills #import the module
+      
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+
+

+ Custom Usage# +

+

+ extract_skills.py + combines the prediction of skills using code from + skill_ner + with the mapping of skills to a taxonomy using code from + skill_ner_mapping. +

+

+ This page explains more about the custom usage of this class + including creating a custom config file and mapping to another + taxonomy. To do this you will need to clone the repo. Please + refer to the main documentation page for the + development setup instructions + for this package and the core usage. +

+
+

+ Configuration files # +

+

+ Core to the Extract Skills package, and in particular the + taxonomy mapping functionality, is config files. These are + included in the instantiation of the class, as so: +

+
+
+
es = ExtractSkills(config_name="extract_skills_toy")
+
+
+
+
+

+ Predefined configurations # +

+

+ There are currently three configurations available for + running the skills extraction algorithm. These + configurations contain information about parameter values, + trained models and directory locations of stored data. +

+
    +
  1. +

    + extract_skills_toy + - Configuration for a toy taxonomy example, useful for + testing. +

    +
  2. +
  3. +

    + extract_skills_esco + - Configuration for extracting skills and matching + them to the ESCO skills taxonomy. This configuration + is correct to v1.1.1 of ESCO. +

    +
  4. +
  5. +

    + extract_skills_lightcast + - Configuration for extracting skills and matching + them to the Lightcast skills taxonomy. This + configuration is correct to the version of Lightcast + as of 22/11/22. +

    +
  6. +
+

+ If you are mapping to the ESCO skills taxonomy using + extract_skills_esco.yaml, we reviewed the top 100 skills and ultimately hard + coded 43 of the most common skills which were not well + matched from a random sample of 100,000 job adverts in the + Open Jobs Observatory + project with the most appropriate skills from the + taxonomy. +

+
+
+

+ Configuration definitions # +

+

+ Every predefined configuration includes the following + parameters: +

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Parameter

Description

+

+ ner_model_path: str +

+
+

+ The relative path to the NER model folder used + to predict skill spans in job adverts. +

+
+

+ taxonomy_name: str +

+
+

The name of the taxonomy to map onto.

+
+

+ taxonomy_path: str +

+
+

+ The relative path to the formatted taxonomy. + Formatted taxonomy must be in + .csv + format. +

+
+

+ clean_job_ads: bool, default=True +

+
+

+ Whether to perform light text cleaning on job + adverts or not. Text cleaning includes detecting + and splitting camelcase in job adverts, + replacing various characters and converting + bullet points to full stops. Defaults to True. +

+
+

+ min_multiskill_length: int +

+
+

+ The minimum character length a predicted + multi-skill sentence must be to apply splitting + rules to. +

+
+

+ (optional) + taxonomy_embedding_file_name: str +

+
+

+ The relative path to a taxonomy embedding file + if it exists. If left unset the embeddings will + be generated when the code is run. +

+
+

+ (optional) + prev_skill_matches_file_name: str +

+
+

+ The relative path to a previous skill matches + file if it exists. +

+
+

+ (optional) + hard_labelled_skills_file_name: str +

+
+

+ The relative path to a hard labelled skills file + if it exists. +

+
+

+ (optional) + hier_name_mapper_file_name: str +

+
+

+ The relative path to a hierarchy name mapper + file if it exists. +

+
+

+ num_hier_levels: int +

+
+

+ The number of levels in the skills taxonomy + hierarchy. This can be set to 0 if the taxonomy + has no levels. +

+
+

+ skill_type_dict: dict +

+
+

+ A dictionary that defines skill types and + hierarchy types.

+ { + "skill_types": + [A + list + of + the + values + of + the + 'type' + column + which + code + skills], + "hier_types": + [A + list + of + the + values + of + the + 'type' + column + which + code + skill + groups, + these + need + to + be + in + order + from + least + to + most + granular]} +

+
+

+ match_thresholds_dict: dict +

+
+

+ A dictionary that defines thresholds at each + level of the skills taxonomy hierarchy. For + example,
+
+ {"skill_match_thresh": + 0.7, + "top_tax_skills": + {1: + 0.5, + 2: + 0.5, + 3: + 0.5},“max_share”: + {1: + 0, + 2: + 0.2, + 3: + 0.2}}
+
+ See + Model Card: Skills to Taxonomy + Mapping + for the details of what these thresholds + represent. +

+
+

+ skill_name_col: str +

+
+

+ The name of the skill/hierarchy level + description text column in formatted taxonomy + .csv. +

+
+

+ skill_id_col: str +

+
+

+ Name of skill id column in formatted taxonomy + .csv. Each row should contain a unique ID for the + skill/hierarchy. +

+
+

+ (optional) + skill_hier_info_col: str +

+
+

+ Name of hierarchy info column in formatted + taxonomy + .csv. The hierarchy info column contains which + hierarchy levels a skill is in (from least to + most granular). If not a skill, then NA. +

+
+

+ skill_type_col: str +

+
+

+ Name of what column name the skill/hier + description is from (category, subcategory) in + formatted taxonomy + .csv. +

+
+
+
+
+
+

+ Mapping to your own taxonomy # +

+

+ Although we currently support three configurations for + running the skills extraction algorithm, you are also able + to map extracted skills onto a taxonomy of your choice by + defining your own configuration file. In order to map skills + onto your own taxonomy you must: +

+
    +
  1. Format your taxonomy

  2. +
  3. Define your own configuration file

  4. +
+
+

+ Format your taxonomy # +

+

+ You must also format your taxonomy in such a way that + looks like the following: +

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

skill_type_col

skill_name_col

skill_id_col

+

(optional) skill_hier_info_col

+

skill

use spreadsheets software

abcd

+

+ [["S", + "S5", + "S5.6", + "S5.6.1"], + ["S", + "S5", + "S5.5", + "S5.5.2"]] +

+

skill

use communication techniques

cdef

+

+ [["S", + "S1", + "S1.0", + "S1.0.0"]] +

+

skill_group_3

+

communication, collaboration and creativity

+

S1.0.0

NaN

skill_group_3

mathematics

S1.2.1

NaN

skill_group_2

presenting information

S1.4

NaN

+
+

+ You will see the + skill_type_col + column contains skills and skill groups. This is because + we try to match to individual skills, but if this isn’t + possible we then try to match to a skill group in the + taxonomy (if given). +

+

+ For rows which correspond to individual skills (rather + than skill groups) the + skill_hier_info_col + column values show all the parts of the taxonomy where + this skill is situated. It is helpful to link these codes + to names, so you may also want to create a taxonomy name + mapper file for this data, e.g. + {"S1.2.1": + "mathematics"}. For rows which correspond to skill groups (rather than + individual skills) the + skill_hier_info_col + column will be blank since the hierarchy information is + contained in the + skill_id_col + column. The contents of + skill_hier_info_col + need to be a list of lists, or a list of strings, but not + a combination of both. +

+

+ The number of levels in the taxonomy will correspond to + the length of the lists in the + skill_hier_info_col + column. +

+

+ Although we don’t provide guidance on re-formatting your + taxonomy, we have re-formatted the ESCO taxonomy to this + format in + this script + and we have re-formatted the Lightcast taxonomy to this + format in + this script. +

+
+
+

+ Define your own configuration file + # +

+

+ Create your own configuration + yaml + file in the format + extract_skills_taxonomy_name.yaml. This config should contain all the parameters as + described in + Predefined configuration definitions. The file should be saved to + your_current_path/ojd_daps_skills/config/. +

+

+ We provide a template config file + here. +

+

+ It is important that the list given in + skill_type_dict['hier_types'] + is in the order from the least to most granular parts of + the taxonomy. For example, in the ESCO taxonomy we match + against the second and third skill group levels, so this + is set to + ["level_2", + "level_3"] + i.e. level 3 is more granular than level 2, where + level + 2 + skill + groups + > + level + 3 + skill + groups + > + individual + skill. +

+

Now you can use your custom taxonomy as:

+
+
+
from ojd_daps_skills.pipeline.extract_skills.extract_skills import ExtractSkills #import the module
 
 es = ExtractSkills(config_name="my_custom_config_name", local=True)
 
 es.load()
 
-
-
-
-
-
+ +
+
+ + + + +
+ -
-
- - - -
-
+ + - - \ No newline at end of file + + diff --git a/docs/build/html/extract_skills.html b/docs/build/html/extract_skills.html index 170f4e04..1f163e59 100644 --- a/docs/build/html/extract_skills.html +++ b/docs/build/html/extract_skills.html @@ -1,398 +1,1119 @@ - + - - - + + + + + + + + + + - - The ExtractSkills class - Skills Extractor v1.0.1 documentation - - - - - + + + The ExtractSkills class - Skills Extractor v1.0.1 documentation + + + + - - + + - - - - - Contents - - - - - - Menu - - - - - - - - Expand - - - - - - Light mode - - - - - - - - - - - - - - Dark mode - - - - - - - Auto light/dark mode - - - - - - - - + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark mode + + + + + + + + + + - - - - - - + + + + - - -
-
-
- -
- -
-
- -
- -
-
- -
-
-
- - - - - Back to top - -
- -
+
+
+
+ +
+ +
+
-
-
-
-

The ExtractSkills class#

-
-
-class ojd_daps_skills.pipeline.extract_skills.extract_skills.ExtractSkills(config_name='extract_skills_toy', local=True, verbose=True, multi_process=False)[source]#
-

Class to extract skills from job adverts and map them to a skills taxonomy.

-
-
Parameters
-
    -
  • config_path (str) – The file name for the config file to be used, defaults to “extract_skills_toy”

  • -
  • local (bool) – Whether you want to load data from local files (True, if not found they will be downloaded from a public source) or via Nesta’s private s3 bucket (False, needs access), defaults to True

  • -
  • verbose (bool) – Whether to limit the number of logging messages (True) or not (False, good for debugging), defaults to True

  • -
  • multi_process (bool) – Whether to use multiprocessing (True) or not (False), defaults to False

  • -
-
-
-
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+
+

+ The + ExtractSkills + class# +

+
+
+ class ojd_daps_skills.pipeline.extract_skills.extract_skills.ExtractSkills(config_name='extract_skills_toy', + local=True, + verbose=True, + multi_process=False)[source]# +
+
+

+ Class to extract skills from job adverts and map them to a + skills taxonomy. +

+
+
+ Parameters: +
+
+
    +
  • +

    + config_path (str) – The + file name for the config file to be used, defaults + to “extract_skills_toy” +

    +
  • +
  • +

    + local (bool) – Whether + you want to load data from local files (True, if + not found they will be downloaded from a public + source) or via Nesta’s private s3 bucket (False, + needs access), defaults to True +

    +
  • +
  • +

    + verbose (bool) – Whether + to limit the number of logging messages (True) or + not (False, good for debugging), defaults to True +

    +
  • +
  • +

    + multi_process (bool) – + Whether to use multiprocessing (True) or not + (False), defaults to False +

    +
  • +
+
+
+
+
-
-
-ExtractSkills.extract_skills(job_adverts_skills: Union[str, List[str]], format_skills=False)[source]#
-

Extract skills from job adverts using a trained NER model and map them to a taxonomy - combines both get_skills and extract_skills. Experiences will also be extracted, but not mapped to a taxonomy. It can also take as input a list of -skills and map them to a taxonomy if format_skills is set to True.

-
-
Parameters
-
    -
  • job_adverts_skills (str or list of strings) – The text of a job advert, a list of job adverts texts, or a list of skills (if format_skills=True)

  • -
  • format_skills (bool) – If the input is a list of skills (rather than job adverts) then this needs to be set to True in order to format them correctly, default to False.

  • -
-
-
Returns
-

A list of dictionaries for each job advert containing the skill and experience entities, and for every skill entity where it maps to in the taxonomy. The output combines both multiskill and skill entities together in the “SKILL” key. Each dictionary is in the format {‘SKILL’: [(skill_entity,(taxonomy_skill_name, taxonomy_skill_id)), …]}, ‘EXPERIENCE’: […]]

-
-
Return type
-

list of dictionaries for each job advert.

-
-
-
+
+
+ ExtractSkills.load(taxonomy_embedding_file_name: Optional[str] = None, + prev_skill_matches_file_name: Optional[str] = None, + hard_labelled_skills_name: Optional[str] = None, + hier_name_mapper_file_name: Optional[str] = None)[source]# +
+
+

+ Loads necessary datasets (formatted taxonomy, hard + labelled skills, previously matched skills, taxonomy + embeddings), JobNER skills extraction class and + SkillMapper skill mapper class. +

+
+
+ Parameters: +
+
+
    +
  • +

    + taxonomy_embedding_file_name (str, optional) – The relative path + to a taxonomy embedding file if it exists. If left + unset the embeddings will be generated when the + code is run. Defaults to None. +

    +
  • +
  • +

    + prev_skill_matches_file_name (str, optional) – The relative path + to a previous skill matches file if it exists. + Defaults to None. +

    +
  • +
  • +

    + hard_labelled_skills_name (str, optional) – The relative path + to a hard labelled skills file if it exists. + Defaults to None. +

    +
  • +
  • +

    + hier_name_mapper_file_name (str, optional) – The relative path + to a hierarchy name mapper file if it exists. + Defaults to None. +

    +
  • +
+
+
+
+
-
-
-ExtractSkills.get_skills(job_adverts: Union[str, List[str]])[source]#
-

Predict skill/multiskill/experience entities using the NER model in inputted job adverts. -Multiskill entities will be split up and converted into individual skill entities where possible.

-
-
Parameters
-

job_adverts (str or list of strings) – The text of a job advert or a list of job adverts texts

-
-
Returns
-

A list of entities extracted from each job advert in the form of dictionaries {“SKILL”: [“Microsoft Excel”], “MULTISKILL”: [], “EXPERIENCE”: []}

-
-
Return type
-

list, the length is equal to the number of job adverts inputted

-
-
-
+
+
+ ExtractSkills.extract_skills(job_adverts_skills: Union[str, List[str]], + format_skills=False)[source]# +
+
+

+ Extract skills from job adverts using a trained NER model + and map them to a taxonomy - combines both get_skills and + extract_skills. Experiences will also be extracted, but + not mapped to a taxonomy. It can also take as input a list + of skills and map them to a taxonomy if format_skills is + set to True. +

+
+
+ Parameters: +
+
+
    +
  • +

    + job_adverts_skills (str or list of strings) – The text of a job advert, a + list of job adverts texts, or a list of skills (if + format_skills=True) +

    +
  • +
  • +

    + format_skills (bool) – + If the input is a list of skills (rather than job + adverts) then this needs to be set to True in + order to format them correctly, default to False. +

    +
  • +
+
+
+ Returns: +
+
+

+ A list of dictionaries for each job advert containing + the skill and experience entities, and for every skill + entity where it maps to in the taxonomy. The output + combines both multiskill and skill entities together + in the “SKILL” key. Each dictionary is in the format + {‘SKILL’: [(skill_entity,(taxonomy_skill_name, + taxonomy_skill_id)), …]}, ‘EXPERIENCE’: […]] +

+
+
+ Return type: +
+
+

list of dictionaries for each job advert.

+
+
+
+
-
-
-ExtractSkills.map_skills(predicted_skills: Union[List[dict], List[str]])[source]#
-

Map skills from job advert(s) to a skills taxonomy. If predicted_skills is a list of skills, it will be formatted accordingly to -be mapped to a skills taxonomy. All multiskill entities will be mapped in the same way as skill entities are.

-
-
Parameters
-

predicted_skills (list of strings or a list of dicts) – A list of skill entities either in the form of a list of strings (assumed to be from the same job advert) or a list of the dictionaries outputted from the get_skills function.

-
-
Returns
-

A list of dictionaries for each job advert containing the skill and experience entities, and for every skill entity where it maps to in the taxonomy. Multi skill entities are treated as skill entities, and the output combines them together as one. Each dictionary is in the format {‘SKILL’: [(skill_entity,(taxonomy_skill_name, taxonomy_skill_id)), …]}, ‘EXPERIENCE’: […]]

-
-
Return type
-

list of dictionaries for each job advert.

-
-
-
+
+
+ ExtractSkills.get_skills(job_adverts: Union[str, List[str]])[source]# +
+
+

+ Predict skill/multiskill/experience entities using the NER + model in inputted job adverts. Multiskill entities will be + split up and converted into individual skill entities + where possible. +

+
+
+ Parameters: +
+
+

+ job_adverts (str or list of strings) – The + text of a job advert or a list of job adverts texts +

+
+
+ Returns: +
+
+

+ A list of entities extracted from each job advert in + the form of dictionaries {“SKILL”: [“Microsoft + Excel”], “MULTISKILL”: [], “EXPERIENCE”: []} +

+
+
+ Return type: +
+
+

+ list, the length is equal to the number of job adverts + inputted +

+
+
+
+
-
-
-ExtractSkills.format_skills(skills: List[str]) List[dict][source]#
-

Format list of skills from a single job advert to be in the format needed for mapping to a taxonomy. Also applies the -multiskill splitting to any skills predicted to be multiskills.

-
-
Parameters
-

skills (str or list of strings) – A list of skills/multiskills from the job advert or a single skill

-
-
Returns
-

The skills arranged into the format [{“SKILL”: […], “MULTISKILL”: […], “EXPERIENCE”: []}]

-
-
Return type
-

a list of length 1 containing a dictionary

-
-
-
+
+
+ ExtractSkills.map_skills(predicted_skills: Union[List[dict], List[str]])[source]# +
+
+

+ Map skills from job advert(s) to a skills taxonomy. If + predicted_skills is a list of skills, it will be formatted + accordingly to be mapped to a skills taxonomy. All + multiskill entities will be mapped in the same way as + skill entities are. +

+
+
+ Parameters: +
+
+

+ predicted_skills (list of strings or a list of dicts) – A list + of skill entities either in the form of a list of + strings (assumed to be from the same job advert) or a + list of the dictionaries outputted from the get_skills + function. +

+
+
+ Returns: +
+
+

+ A list of dictionaries for each job advert containing + the skill and experience entities, and for every skill + entity where it maps to in the taxonomy. Multi skill + entities are treated as skill entities, and the output + combines them together as one. Each dictionary is in + the format {‘SKILL’: + [(skill_entity,(taxonomy_skill_name, + taxonomy_skill_id)), …]}, ‘EXPERIENCE’: […]] +

+
+
+ Return type: +
+
+

list of dictionaries for each job advert.

+
+
+
+
-
+
+
+ ExtractSkills.format_skills(skills: List[str]) + + List[dict][source]# +
+
+

+ Format list of skills from a single job advert to be in + the format needed for mapping to a taxonomy. Also applies + the multiskill splitting to any skills predicted to be + multiskills. +

+
+
+ Parameters: +
+
+

+ skills (str or list of strings) – A list + of skills/multiskills from the job advert or a single + skill +

+
+
+ Returns: +
+
+

+ The skills arranged into the format [{“SKILL”: […], + “MULTISKILL”: […], “EXPERIENCE”: []}] +

+
+
+ Return type: +
+
+

a list of length 1 containing a dictionary

+
+
+
+
+ + +
+ -
-
- - - -
-
+
+ - - \ No newline at end of file + + diff --git a/docs/build/html/index.html b/docs/build/html/index.html index 57a0c155..5fa0b5cb 100644 --- a/docs/build/html/index.html +++ b/docs/build/html/index.html @@ -1,336 +1,791 @@ - + - - - + + + + + + + + + - - Skills Extractor v1.0.1 documentation - - - - - + + Skills Extractor v1.0.1 documentation + + + - - + + - - - - - Contents - - - - - - Menu - - - - - - - - Expand - - - - - - Light mode - - - - - - - - - - + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark mode + + + + + + + + + + - - - Dark mode - - - - - - - Auto light/dark mode - - - - - - - - - - - - - - - - - - -
-
-
- -
- -
-
- -
- -
-
- -
-
-
- - - - - Back to top - -
- -
+
+
+
+ +
+ +
+
-
-
- - -
-
- + +
- -
-
+ - - \ No newline at end of file + + diff --git a/docs/build/html/labelling.html b/docs/build/html/labelling.html index babcb631..8f91bebe 100644 --- a/docs/build/html/labelling.html +++ b/docs/build/html/labelling.html @@ -1,304 +1,531 @@ - + - - - + + + + + + + + + + - - Entity Labelling - Skills Extractor v1.0.1 documentation - - - - - + + Entity Labelling - Skills Extractor v1.0.1 documentation + + + - - + + - - - - - Contents - - - - - - Menu - - - - - - - - Expand - - - - - - Light mode - - - - - - - - - - - - - - Dark mode - - - - - - - Auto light/dark mode - - - - - - - - + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark mode + + + + + + + + + + - - - - - - + + + + - - -
-
-
- -
- -
-
- -
- -
-
- -
-
-
- - - - - Back to top - -
- -
+
+
+
+ +
+ +
+
-
-
-
-

Entity Labelling#

-

To extract skills from job adverts we took an approach of training a named entity recognition (NER) model to predict which parts of job adverts were skills (“skill entities”) and which were experiences (“experience entities”).

-

To train the NER model we needed labelled data. First we created a random sample of job adverts and got them into a form needed for labelling using Label Studio. More about this labelling process can be found in the skill_ner pipeline.

-

There are 3 entity labels in our training data:

-
    -
  1. SKILL

  2. -
  3. MULTISKILL

  4. -
  5. EXPERIENCE

  6. -
-

The user interface for this labelling task looks like:

-

-

We tried our best to label from the start to end of each individual skill, starting at the verb (if given): -

-

Sometimes it wasn’t easy to label individual skills, for example an earlier part of the sentence might be needed to define the later part. An example of this is “Working in a team and on an individual basis” - we could label “Working in a team” as a single skill, but “on an individual basis” makes no sense without the “Working” word. In these situations we labelled the whole span as multi skills: -

-

Sometimes there were no entities to label: -

-

EXPERIENCE labels will often be followed by the word “experience” e.g. “insurance experience”, and we included some qualifications as experience, e.g. “Electrical qualifications”.

-
-

Training dataset#

-

For the current NER model, 5641 entities in 375 job adverts from our dataset of job adverts were labelled; 354 are multiskill, 4696 are skill, and 608 were experience entities. 20% of the labelled entities were held out as a test set to evaluate the models.

-
-
- -
-
-
- -
-
- -
-
+
+ - - \ No newline at end of file + + diff --git a/docs/build/html/license.html b/docs/build/html/license.html index f0e07412..121e36af 100644 --- a/docs/build/html/license.html +++ b/docs/build/html/license.html @@ -1,260 +1,434 @@ - + - - - + + + + + + + + + - - The MIT License (MIT) - Skills Extractor v1.0.1 documentation - - - - - + + The MIT License (MIT) - Skills Extractor v1.0.1 documentation + + + - - + + - - - - - Contents - - - - - - Menu - - - - - - - - Expand - - - - - - Light mode - - - - - - - - - - + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark mode + + + + + + + + + + - - - Dark mode - - - - - - - Auto light/dark mode - - - - - - - - - - - - - - - - - - -
-
-
- -
- -
-
- -
- -
-
- -
-
-
- - - - - Back to top - -
- -
+
+
+
+ +
+ +
+
-
-
-
-

The MIT License (MIT)#

-

Copyright (c) 2022, Nesta

-

Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:

-

The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.

-

THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

-
- -
-
-
- -
+ +
- -
-
+ - - \ No newline at end of file + + diff --git a/docs/build/html/model_card.html b/docs/build/html/model_card.html index 0c9371ca..8f4c1b79 100644 --- a/docs/build/html/model_card.html +++ b/docs/build/html/model_card.html @@ -1,414 +1,930 @@ - + - - - + + + + + + + + + + - - Model Cards - Skills Extractor v1.0.1 documentation - - - - - + + Model Cards - Skills Extractor v1.0.1 documentation + + + - - + + - - - - - Contents - - - - - - Menu - - - - - - - - Expand - - - - - - Light mode - - - - - - - - - - - - - - Dark mode - - - - - - - Auto light/dark mode - - - - - - - - + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark mode + + + + + + + + + + - - - - - - + + + + - - -
-
-
- -
- -
-
- -
- -
-
- -
-
-
- - - - - Back to top - -
- -
+
+
+
+ +
+ +
+
-
-
-
-

Model Cards#

-

This page contains information for different parts of the skills extraction and mapping pipeline. We detail the two main parts of the pipeline; the extract skills pipeline and the skills to taxonomy mapping pipeline.

-

Developed by data scientists in Nesta’s Data Analytics Practice, (last updated on 23-11-2022).

-
    -
  • Model Card: Extract Skills

  • -
  • Model Card: Skills to Taxonomy Mapping

  • -
-

-An example of extracting skills and mapping them to the ESCO taxonomy.

-
-

Model Card: Named Entity Recognition Model #

-

-The extracting skills pipeline.

-
-

Summary#

-
    -
  • Train a Named Entity Recognition (NER) spaCy component to extract skills, multiskills and experience entities from job adverts.

  • -
  • Predict whether or not a skill is multi-skill or not using scikit learn’s SVM model. Features are length of entity; if ‘and’ in entity; if ‘,’ in entity.

  • -
  • Split multiskills, where possible, based on semantic rules.

  • -
-
-
-

Training#

-
    -
  • For the NER model, 375 job adverts were labelled for skills, multiskills and experience.

  • -
  • As of 15th November 2022, 5641 entities in 375 job adverts from OJO were labelled;

  • -
  • 354 are multiskill, 4696 are skill, and 608 were experience entities. 20% of the labelled entities were held out as a test set to evaluate the models.

  • -
-

The NER model we trained used spaCy’s NER neural network architecture. Their NER architecture “features a sophisticated word embedding strategy using subword features and ‘Bloom’ embeddings, a deep convolutional neural network with residual connections, and a novel transition-based approach to named entity parsing” - more about this here.

-

You can read more about the creation of the labelling data here.

-
-
-

NER Metrics#

-
    -
  • A metric in the python library nerevaluate (read more here) was used to calculate F1, precision and recall for the NER and SVM classifier on the held-out test set. As of 15th November 2022, the results are as follows:

  • -
-
- - - - - - - - - - - - - - - - - - - - - - - - - -

Entity

F1

Precision

Recall

Skill

0.586

0.679

0.515

Experience

0.506

0.648

0.416

All

0.563

0.643

0.500

-
-
    -
  • These metrics use partial entity matching.

  • -
  • More details of the evaluation performance across both the NER model and the SVM model can be found in outputs/models/ner_model/20220825/train_details.json

  • -
-
-
-

Multiskill Metrics#

-
    -
  • The same training data and held out test set used for the NER model was used to evaluate the SVM model. On a held out test set, the SVM model achieved 91% accuracy.

  • -
  • When evaluating the multiskill splitter algorithm rules, 253 multiskill spans were labelled as ‘good’, ‘ok’ or ‘bad’ splits. Of the 253 multiskill spans, 80 were split. Of the splits, 66% were ‘good’, 9% were ‘ok’ and 25% were ‘bad’.

  • -
  • More details of the evaluation performance across both the NER model and the SVM model can be found in outputs/models/ner_model/20220825/train_details.json

  • -
-
-
-

Caveats and Recommendations#

-
    -
  • As we take a rules based approach to splitting multiskills, many multiskills do not get split. If a multiskill is unable to be split, we still match to a taxonomy of choice. Future work should add more rules to split multiskills.

  • -
  • We deduplicate the extracted skills in the output. This means that if a job advert mentions ‘excel skills’ twice and these entities are extracted, the output will just contain “excel skills” once. However, if the string is slightly different, e.g. “excel skills” and “Excel skill”, both occurrences will be outputted.

  • -
  • Future work could look to train embeddings with job-specific texts, disambiguate acronyms and improve NER model performance.

  • -
-
-
-
-

Model Card: Skills to Taxonomy Mapping #

-

-The methodology for matching skills to the ESCO taxonomy - threshold numbers can be changed in the config file.

-
-

Summary#

-
    -
  • Match to a taxonomy based on different similarity thresholds.

  • -
  • First try to match at the most granular level of a taxonomy based on cosine similarity between embedded, extracted skill and taxonomy skills. Extracted and taxonomy skills are embedded using huggingface’s sentence-transformers/all-MiniLM-L6-v2 model.

  • -
  • If there is no close granular skill above 0.7 cosine similarity (this threshold can be changed in configuration file), we then assign the skill to different levels of the taxonomy in one of two approaches (maximum share and maximum similarity - see diagram above for details).

  • -
  • If matching to ESCO, 43 commonly occurring skills from a sample of 100,000 job adverts are hard coded.

  • -
-
-
-

Model Factors#

-

The main factors in this matching approach are: 1) the different thresholds at different levels of a taxonomy and 2) the different matching approaches.

-
-
-

Caveats and Recommendations#

-

This step does less well when:

-
    -
  • The extracted skill is a metaphor: i.e. ‘understand the bigger picture’ gets matched to ‘take pictures’

  • -
  • The extracted skill is an acronym: i.e. ‘drafting ORSAs’ gets matched to ‘fine arts’

  • -
  • The extracted skill is not a skill (poor NER model performance): i.e. ‘assist with the’ gets matched to providing general assistance to people

  • -
-

We recommend that:

-
    -
  • Skill entities might match to the same taxonomy skill; the output does not deduplicate matched skills. If deduplicating is important, you will need to deduplicate at the taxonomy level.

  • -
  • The current predefined configurations ensures that every extracted skill will be matched to a taxonomy. However, if a skill is matched to the highest skill group, we label it as ‘unmatched’. Under this definition, for ESCO we identify approximately 2% of skills as ‘unmatched’.

  • -
  • The configuration file contains the relevant thresholds for matching per taxonomy. These thresholds will need to be manually tuned based on different taxonomies.

  • -
-
-
-
- -
-
-
- -
-
- -
-
+
+ - - \ No newline at end of file + + diff --git a/docs/build/html/pipeline_summary.html b/docs/build/html/pipeline_summary.html index 72e13ff3..9aa57ed1 100644 --- a/docs/build/html/pipeline_summary.html +++ b/docs/build/html/pipeline_summary.html @@ -1,451 +1,935 @@ - + - - - + + + + + + + + + + - - Pipeline summary and metrics - Skills Extractor v1.0.1 documentation - - - - - + + + Pipeline summary and metrics - Skills Extractor v1.0.1 documentation + + + + - - + + - - - - - Contents - - - - - - Menu - - - - - - - - Expand - - - - - - Light mode - - - - - - - - - - - - - - Dark mode - - - - - - - Auto light/dark mode - - - - - - - - + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark mode + + + + + + + + + + - - - - - - + + + + - - -
-
-
- -
- -
-
- -
- -
-
- -
-
-
- - - - - Back to top - -
- -
+
+
+
+ +
+ +
+
-
-
-
-

Pipeline summary and metrics#

-

-

High level, the overall pipeline includes:

-
    -
  • Named Entity Recognition (NER) model to extract skill, multi skill or experience entities in job adverts;

  • -
  • Support Vector Machine (SVM) model to predict whether the skill entity is a skill or multiskill; if multiskill, apply rules to split multiskills into individual skill entities;

  • -
  • Embed all entities (skill and multi skill entities) and taxonomy skills using huggingface’s sentence-transformers/all-MiniLM-L6-v2 pre-trained model;

  • -
  • Map extracted skills (skill and multi skill) onto taxonomy skills using cosine similarity of embeddings.

  • -
-

For further information or feedback please contact Liz Gallagher, India Kerle or Cath Sleeman.

-
-

Intended Use#

-
    -
  • Extract skills from online job adverts and match extracted skills to a user’s skill taxonomy of choice, such as the European Commission’s European Skills, Competences, and Occupations (ESCO) or Lightcast’s Open Skills.

  • -
  • Intended users include researchers in labour statistics or related government bodies.

  • -
-
-
-

Out of Scope Uses#

-
    -
  • Out of scope is extracting and matching skills from job adverts in non-English languages; extracting and matching skills from texts other than job adverts; drawing conclusions on new, unidentified skills.

  • -
  • Skills extracted should not be used to determine skill demand without expert steer and input nor should be used for any discriminatory hiring practices.

  • -
-
-
-

Metrics#

-

There is no exact way to evaluate how well our pipeline works; however we have several proxies to better understand how our approach compares.

-
-

Comparison 1 - Top skill groups per occupation comparison to ESCO essential skill groups per occupation#

-

The ESCO dataset also includes information on the essential skills per occupation. We compare ESCO’s essential skill groups per occupation with the top ESCO-mapped skill groups per occupation. We identify top skills per occupation by:

-
    -
  • Identifying occupations for which we have at least 100 job adverts;

  • -
  • Identify skills extracted at ONLY the skill level;

  • -
  • Identify a top skill threshold by calculating the 75 percentile % of skills counts for a given occupation

  • -
  • Identify the % of top ESCO-mapped skill groups in ESCO’s essential skill groups per occupation

  • -
-

At a high level, we find that:

-
    -
  • 58 occupations with 100 or more job adverts were found in both ESCO and a sample of deduplicated 100,000 job adverts

  • -
  • The average # of adverts per occupation is 345.54

  • -
  • We extract essential ESCO skills, transversal skills and additional skills

  • -
  • On average, 94.5 percent of essential ESCO skill groups were also in the top skill groups extracted per occupation

  • -
  • The median percent of essential ESCO skills per occupation that were extracted from our algorithm is 97.84.

  • -
-
-
-

Comparison 2 - Degree of overlap between Lightcast’s extracted skills and our Lightcast skills#

-

We compare extracted Lightcast skills from Lightcasts’ Open Skills algorithm and our current approach from 99 job adverts, with a minimum cosine similarity threshold between an extracted skill and taxonomy skill set to 0 to guarantee we only match at the skill level

-

We found:

-
    -
  • We extract an average of 10.22 skills per job advert while Lightcast’s Open Skills algorithm extracts an average of 6.42 skills per job advert

  • -
  • There no overlap for 40% of job adverts between the two approaches

  • -
  • Of the job adverts where there is overlap, on average, 39.3% of extracted Lightcast skills are present in our current approach. The median percentage is 33.3%.

  • -
  • Qualitatively, there are a number of limitations to the degree of overlap approach for comparison:

  • -
  • The two skill lists may contain very similar skills i.e. Financial Accounting vs. Finance but will be considered different as a result

  • -
  • For exact comparison, we set the cosine similarity threshold to 0 to guarantee extracted skill-level skills but would otherwise not do so. This allows for inappropriate skill matches i.e. ‘Eye Examination’ for a supply chain role

  • -
  • Lightcast’s algorithm may not be a single source of truth and it also extracts inappropriate skill matches i.e. ‘Flooring’ for a care assistant role

  • -
-
-
-

Evaluation 1 - Manual judgement of false positive rate#

-

We looked at the ESCO-mapped skills extracted from a random sample of 64 job adverts, and manually judged how many skills shouldn’t have been extracted from the job advert i.e. the false positives. We also performed this analysis when looking at the skills extracted from 22 job adverts using Lightcast’s Skills Extractor API.

-
    -
  • Our results showed on average 27% of the skills extracted from a job advert are false positives.

  • -
  • For Lightcast, on average 12% of the skills extracted from a job advert are false positives.

  • -
-
-
-

Evaluation 2 - Manual judgement of skills extraction and mapping quality#

-

We manually tagged a random sample of skills extracted from job adverts, with whether we thought they were inappropriate, OK or excellent skill entities, and whether we thought they had inappropriate, OK or excellent matches to ESCO skills.

-
    -
  • We felt that out of 183 skill entities 73% were excellent entities, 19% were OK and 8% were inappropriate.

  • -
  • 172 out of 183 skill entities were matched to ESCO skills.

  • -
  • Of the 172 matched skill entities we felt 53% were excellently matched, 30% were OK and 17% were inappropriate.

  • -
-
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

Skill entity quality

ESCO match quality

count

Inappropriate

Inappropriate

9

Inappropriate

OK

1

OK

Inappropriate

9

OK

OK

16

OK

Excellent

7

Excellent

Inappropriate

11

Excellent

OK

35

Excellent

Excellent

83

-
-
    -
  • 87% of the matches were to either an individual skill or the lowest level of the skills taxonomy (level 3).

  • -
  • The match quality is at its best when the skill entity is matched to an individual ESCO skill.

  • -
-
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

Taxonomy level mapped to

Number in sample

Average match quality score (0-inappropriate, 1-OK, 2-excellent)

Skill

99

1.71

Skill hierarchy level 3

51

0.90

Attitudes hierarchy

8

1.63

Skill hierarchy level 2

6

0.33

Knoweldge hierarchy

6

0.17

Transversal hierarchy

1

1.00

-
-
-
-
- -
-
-
- -
-
- -
-
+
+ - - \ No newline at end of file + + diff --git a/docs/build/html/searchindex.js b/docs/build/html/searchindex.js index c69ac25d..b38bbfa4 100644 --- a/docs/build/html/searchindex.js +++ b/docs/build/html/searchindex.js @@ -1 +1,1121 @@ -Search.setIndex({"docnames": ["about", "index"], "filenames": ["about.md", "index.rst"], "titles": ["Skills Extractor", "<no title>"], "terms": {"document": 0, "thi": 0, "page": 0, "contain": 0, "inform": 0, "how": 0, "The": [0, 1], "allow": 0, "you": 0, "phrase": 0, "from": 0, "job": 0, "advertis": 0, "text": 0, "them": 0, "onto": 0, "taxonomi": [0, 1], "your": [0, 1], "choic": 0, "we": 0, "current": 0, "support": 0, "three": 0, "differ": 0, "european": 0, "commiss": 0, "compet": 0, "occup": [0, 1], "esco": [0, 1], "lightcast": [0, 1], "open": 0, "toi": 0, "intern": 0, "purpos": 0, "If": 0, "d": 0, "like": 0, "learn": 0, "more": 0, "about": 0, "model": [0, 1], "pleas": 0, "refer": 0, "card": [0, 1], "mai": 0, "also": 0, "want": 0, "read": 0, "wider": 0, "our": [0, 1], "introduct": 0, "blog": 0, "interact": 0, "can": 0, "pip": 0, "ojd": 0, "dap": 0, "need": 0, "download": 0, "spaci": 0, "en_core_web_sm": 0, "python": 0, "m": 0, "when": 0, "packag": 0, "i": 0, "first": 0, "automat": 0, "folder": 0, "neccessari": 0, "data": 0, "file": [0, 1], "1gb": 0, "although": 0, "don": 0, "t": 0, "have": 0, "credenti": 0, "work": 0, "kei": 0, "function": 0, "advert": 0, "list": 0, "option": 0, "local": 0, "fals": [0, 1], "onli": 0, "those": 0, "access": 0, "s3": 0, "bucket": 0, "would": 0, "one": 0, "step": 0, "ar": 0, "abl": 0, "do": 0, "so": 0, "extract_skil": [0, 1], "method": 0, "ojd_daps_skil": 0, "pipelin": [0, 1], "import": 0, "extractskil": [0, 1], "modul": 0, "e": 0, "config_nam": 0, "extract_skills_toi": 0, "true": 0, "instanti": 0, "configur": [0, 1], "load": [0, 1], "necessari": 0, "job_advert": 0, "involv": 0, "commun": 0, "math": 0, "excel": 0, "good": 0, "present": 0, "exampl": 0, "job_skills_match": 0, "match": 0, "output": 0, "follow": 0, "collabor": 0, "creativ": 0, "s1": 0, "comput": 0, "s5": 0, "simpli": 0, "predicted_skil": 0, "get_skil": [0, 1], "experi": 0, "multiskil": [0, 1], "either": 0, "get_stil": 0, "In": 0, "instanc": 0, "skills_list": 0, "skills_list_match": 0, "map_skil": [0, 1], "format": [0, 1], "techniqu": 0, "cdef": 0, "demo": 0, "front": 0, "end": 0, "built": 0, "streamlit": 0, "given": 0, "past": 0, "ani": 0, "extract_skills_lightcast": 0, "extract_skills_esco": 0, "modifi": 0, "sourc": 0, "code": 0, "clone": 0, "run": 0, "git": 0, "github": 0, "com": 0, "nestauk": 0, "meet": 0, "scienc": 0, "cookiecutt": 0, "requir": 0, "brief": 0, "direnv": 0, "conda": 0, "creat": 0, "blank": 0, "log": 0, "mkdir": 0, "state": 0, "touch": 0, "make": 0, "environ": 0, "zip": 0, "click": 0, "here": 0, "after": 0, "unzip": 0, "move": 0, "parent": 0, "split": 0, "core": 0, "skill_ner": 0, "train": [0, 1], "entiti": [0, 1], "recognit": [0, 1], "ner": [0, 1], "skill_ner_map": 0, "an": 0, "exist": 0, "semant": 0, "similar": 0, "user": 0, "friendli": 0, "much": 0, "found": 0, "each": 0, "readm": 0, "some": 0, "check": 0, "pytest": 0, "variou": 0, "piec": 0, "done": 0, "These": 0, "dataset": [0, 1], "privat": 0, "therefor": 0, "design": 0, "technic": 0, "style": 0, "wa": 0, "made": 0, "possibl": 0, "via": 0, "fund": 0, "econom": 0, "statist": 0, "centr": 0, "templat": 0, "base": 0, "doc": 0, "skill": 1, "extractor": 1, "welcom": 1, "nesta": 1, "": 1, "librari": 1, "instal": 1, "aw": 1, "cli": 1, "tl": 1, "dr": 1, "us": 1, "1": 1, "extract": 1, "AND": 1, "map": 1, "2": 1, "3": 1, "app": 1, "develop": 1, "setup": 1, "project": 1, "structur": 1, "test": 1, "analysi": 1, "contributor": 1, "guidelin": 1, "custom": 1, "usag": 1, "predefin": 1, "definit": 1, "own": 1, "defin": 1, "summari": 1, "metric": 1, "intend": 1, "out": 1, "scope": 1, "comparison": 1, "top": 1, "group": 1, "per": 1, "essenti": 1, "degre": 1, "overlap": 1, "between": 1, "evalu": 1, "manual": 1, "judgement": 1, "posit": 1, "rate": 1, "qualiti": 1, "name": 1, "caveat": 1, "recommend": 1, "factor": 1, "label": 1, "class": 1, "format_skil": 1, "mit": 1, "licens": 1}, "objects": {}, "objtypes": {}, "objnames": {}, "titleterms": {"skill": 0, "extractor": 0, "welcom": 0, "nesta": 0, "": 0, "librari": 0, "instal": 0, "name": 0, "aw": 0, "cli": 0, "tl": 0, "dr": 0, "us": 0, "usag": 0, "1": 0, "extract": 0, "AND": 0, "map": 0, "2": 0, "3": 0, "app": 0, "develop": 0, "setup": 0, "project": 0, "structur": 0, "test": 0, "analysi": 0, "contributor": 0, "guidelin": 0}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 8, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.viewcode": 1, "sphinx": 57}, "alltitles": {"Skills Extractor": [[0, "skills-extractor"]], "Welcome to Nesta\u2019s Skills Extractor Library": [[0, "welcome-to-nesta-s-skills-extractor-library"]], "Installation ": [[0, "installation-a-name-installation-a"]], "AWS CLI": [[0, "aws-cli"]], "TL;DR: Using Nesta\u2019s Skills Extractor library ": [[0, "tl-dr-using-nesta-s-skills-extractor-library-a-name-usage-a"]], "1. Extract AND map skills": [[0, "extract-and-map-skills"]], "2. Extract skills": [[0, "extract-skills"]], "3. Map skills": [[0, "map-skills"]], "App": [[0, "app"]], "Development ": [[0, "development-a-name-development-a"]], "Setup": [[0, "setup"]], "Project structure": [[0, "project-structure"]], "Testing": [[0, "testing"]], "Analysis": [[0, "analysis"]], "Contributor guidelines": [[0, "contributor-guidelines"]]}, "indexentries": {}}) \ No newline at end of file +Search.setIndex({ + docnames: [ + "about", + "custom_usage", + "extract_skills", + "index", + "labelling", + "license", + "model_card", + "pipeline_summary", + ], + filenames: [ + "about.md", + "custom_usage.md", + "extract_skills.rst", + "index.rst", + "labelling.md", + "license.md", + "model_card.md", + "pipeline_summary.md", + ], + titles: [ + "Skills Extractor", + "Custom Usage", + 'The ExtractSkills class', + "<no title>", + "Entity Labelling", + "The MIT License (MIT)", + "Model Cards", + "Pipeline summary and metrics", + ], + terms: { + document: [0, 1, 5], + thi: [0, 1, 2, 4, 5, 6, 7], + page: [0, 1, 6], + contain: [0, 1, 2, 6, 7], + inform: [0, 1, 6, 7], + how: [0, 7], + The: [0, 1, 3, 4, 6, 7], + allow: [0, 7], + you: [0, 1, 2, 6], + phrase: 0, + from: [0, 1, 2, 4, 5, 6, 7], + job: [0, 1, 2, 4, 6, 7], + advertis: 0, + text: [0, 1, 2, 6, 7], + them: [0, 1, 2, 4, 6], + onto: [0, 1, 7], + taxonomi: [0, 2, 3, 7], + your: [0, 3], + choic: [0, 1, 6, 7], + we: [0, 1, 4, 6, 7], + current: [0, 1, 4, 6, 7], + support: [0, 1, 7], + three: [0, 1], + differ: [0, 6, 7], + european: [0, 7], + commiss: [0, 7], + compet: [0, 7], + occup: [0, 3], + esco: [0, 1, 3, 6], + lightcast: [0, 1, 3], + open: [0, 1, 7], + toi: [0, 1], + intern: 0, + purpos: [0, 5], + If: [0, 1, 2, 6], + d: 0, + like: [0, 1, 4], + learn: [0, 6], + more: [0, 1, 4, 6, 7], + about: [0, 1, 4, 6], + model: [0, 1, 2, 3, 4, 7], + pleas: [0, 1, 7], + refer: [0, 1], + card: [0, 1, 3], + mai: [0, 1, 7], + also: [0, 1, 2, 7], + want: [0, 1, 2], + read: [0, 6], + wider: 0, + our: [0, 3, 4], + introduct: 0, + blog: 0, + interact: 0, + can: [0, 1, 2, 4, 6], + pip: 0, + ojd: 0, + dap: 0, + need: [0, 1, 2, 4, 6], + download: [0, 2], + spaci: 6, + en_core_web_sm: [], + python: 6, + m: [], + when: [0, 1, 2, 6, 7], + packag: [0, 1], + i: [0, 1, 2, 4, 5, 6, 7], + first: [0, 4, 6], + automat: 0, + folder: [0, 1], + neccessari: 0, + data: [0, 1, 2, 4, 6], + file: [0, 2, 3, 5, 6], + "1gb": 0, + although: 1, + don: 1, + t: [1, 4, 7], + have: [0, 1, 7], + credenti: [], + work: [0, 4, 6, 7], + kei: [0, 2], + function: [0, 1, 2], + advert: [0, 1, 2, 4, 6, 7], + list: [0, 1, 2, 7], + option: [0, 1, 2], + local: [0, 1, 2], + fals: [0, 2, 3], + onli: [0, 7], + those: 0, + access: [0, 2], + s3: [0, 2], + bucket: [0, 2], + would: [0, 7], + one: [0, 2, 6], + step: [0, 6], + ar: [0, 1, 2, 4, 6, 7], + abl: [0, 1], + do: [0, 1, 5, 6, 7], + so: [0, 1, 5, 7], + extract_skil: [0, 1, 2, 3], + method: 0, + ojd_daps_skil: [0, 1, 2], + pipelin: [0, 1, 2, 3, 4, 6], + import: [0, 1, 6], + extractskil: [0, 1, 3], + modul: [0, 1], + e: [0, 1, 4, 6, 7], + config_nam: [0, 1, 2], + extract_skills_toi: [0, 1, 2], + true: [0, 1, 2], + instanti: [0, 1], + configur: [0, 3, 6], + load: [0, 1, 2, 3], + necessari: [0, 2], + job_advert: [0, 2], + involv: 0, + commun: [0, 1], + math: 0, + excel: [0, 2, 6, 7], + good: [0, 2, 6], + present: [0, 1, 7], + exampl: [0, 1, 4, 6], + job_skills_match: 0, + match: [0, 1, 2, 6, 7], + output: [0, 2, 6], + follow: [0, 1, 4, 5, 6], + collabor: [0, 1], + creativ: [0, 1], + s1: [0, 1], + comput: 0, + s5: [0, 1], + simpli: 0, + predicted_skil: [0, 2], + get_skil: [0, 2, 3], + experi: [0, 2, 4, 6, 7], + multiskil: [0, 2, 3, 4, 7], + either: [0, 2, 7], + get_stil: 0, + In: [0, 1, 4], + instanc: 0, + skills_list: 0, + skills_list_match: 0, + map_skil: [0, 2, 3], + format: [0, 2, 3], + techniqu: [0, 1], + cdef: [0, 1], + demo: 0, + front: 0, + end: [0, 4], + built: 0, + streamlit: 0, + given: [0, 1, 4, 7], + past: 0, + ani: [0, 2, 5, 7], + extract_skills_lightcast: [0, 1], + extract_skills_esco: [0, 1], + modifi: [0, 5], + sourc: [0, 2, 7], + code: [0, 1, 2, 6], + clone: [0, 1], + run: [0, 1, 2], + git: 0, + github: 0, + com: 0, + nestauk: 0, + meet: 0, + scienc: 0, + cookiecutt: 0, + requir: 0, + brief: 0, + direnv: 0, + conda: 0, + creat: [0, 1, 4], + blank: [0, 1], + log: [0, 2], + mkdir: 0, + state: 0, + touch: 0, + make: [0, 4], + environ: 0, + zip: [], + click: [], + here: [0, 1, 6], + after: [], + unzip: [], + move: [], + parent: [], + split: [0, 1, 2, 6, 7], + core: [0, 1], + skill_ner: [0, 1, 4], + train: [0, 1, 2, 3, 7], + entiti: [0, 2, 3, 7], + recognit: [0, 3, 4, 7], + ner: [0, 1, 2, 3, 4, 7], + skill_ner_map: [0, 1], + an: [0, 4, 5, 6, 7], + exist: [0, 1, 2], + semant: [0, 6], + similar: [0, 6, 7], + user: [0, 4, 7], + friendli: 0, + much: 0, + found: [0, 2, 4, 6, 7], + each: [0, 1, 2, 4], + readm: 0, + some: [0, 4], + check: 0, + pytest: 0, + variou: [0, 1], + piec: 0, + done: 0, + These: [0, 1, 6], + dataset: [0, 2, 3, 7], + privat: [0, 2], + therefor: 0, + design: 0, + technic: 0, + style: 0, + wa: [0, 6], + made: 0, + possibl: [0, 1, 2, 6], + via: [0, 2], + fund: 0, + econom: 0, + statist: [0, 7], + centr: 0, + templat: [0, 1], + base: [0, 6], + doc: 0, + skill: [1, 2, 3, 4], + extractor: [3, 7], + welcom: 3, + nesta: [2, 3, 5, 6], + "": [1, 2, 3, 6], + librari: [3, 6], + instal: 3, + aw: [], + cli: [], + tl: 3, + dr: 3, + us: [1, 2, 3, 4, 5, 6], + 1: [1, 2, 3, 6], + extract: [1, 2, 3, 4, 6], + AND: [3, 5], + map: [2, 3], + 2: [1, 3, 6], + 3: [1, 3, 4, 7], + app: 3, + develop: [1, 3, 6], + setup: [1, 3], + project: [1, 3], + structur: 3, + test: [1, 3, 4, 6], + analysi: [3, 7], + contributor: 3, + guidelin: 3, + custom: 3, + usag: 3, + predefin: [3, 6], + definit: [3, 6], + own: 3, + defin: [3, 4], + summari: 3, + metric: 3, + intend: 3, + out: [3, 4, 5, 6], + scope: 3, + comparison: 3, + top: [1, 3], + group: [1, 3, 6], + per: [3, 6], + essenti: 3, + degre: 3, + overlap: 3, + between: [3, 6], + evalu: [3, 4, 6], + manual: [3, 6], + judgement: 3, + posit: 3, + rate: 3, + qualiti: 3, + name: [2, 3, 4, 7], + caveat: 3, + recommend: 3, + factor: 3, + label: [1, 2, 3, 6], + class: [1, 3], + format_skil: [2, 3], + mit: 3, + licens: 3, + note: 0, + maco: 0, + ubuntu: 0, + chang: [0, 6], + been: [0, 7], + compat: 0, + window: 0, + system: 0, + cannot: 0, + guarante: [0, 7], + contribut: 0, + push: 0, + new: [0, 7], + branch: 0, + order: [0, 1, 2], + trigger: 0, + py: 1, + combin: [1, 2], + predict: [1, 2, 4, 6, 7], + explain: 1, + includ: [1, 4, 5, 7], + config: [1, 2, 6], + anoth: 1, + To: [1, 4], + repo: 1, + main: [1, 6], + instruct: 1, + particular: [1, 5], + There: [1, 4, 7], + avail: 1, + algorithm: [1, 6, 7], + paramet: [1, 2], + valu: 1, + directori: 1, + locat: 1, + store: 1, + correct: 1, + v1: 1, + version: 1, + 22: [1, 7], + 11: [1, 6, 7], + yaml: 1, + review: 1, + 100: [1, 6, 7], + ultim: 1, + hard: [1, 2, 6], + 43: [1, 6], + most: [1, 6], + common: 1, + which: [1, 4, 7], + were: [1, 4, 6, 7], + well: [1, 6, 7], + random: [1, 4, 7], + sampl: [1, 4, 6, 7], + "000": [1, 6, 7], + observatori: 1, + appropri: 1, + everi: [1, 2, 6], + descript: 1, + ner_model_path: 1, + str: [1, 2], + rel: [1, 2], + path: [1, 2], + span: [1, 4, 6], + taxonomy_nam: 1, + taxonomy_path: 1, + must: 1, + csv: 1, + clean_job_ad: 1, + bool: [1, 2], + default: [1, 2], + whether: [1, 2, 5, 6, 7], + perform: [1, 6, 7], + light: 1, + clean: 1, + detect: 1, + camelcas: 1, + replac: 1, + charact: 1, + convert: [1, 2], + bullet: 1, + point: 1, + full: 1, + stop: 1, + min_multiskill_length: 1, + int: 1, + minimum: [1, 7], + length: [1, 2, 6], + multi: [1, 2, 4, 6, 7], + sentenc: [1, 4, 6, 7], + appli: [1, 2, 7], + rule: [1, 6, 7], + taxonomy_embedding_file_nam: [1, 2], + embed: [1, 2, 6, 7], + left: [1, 2], + unset: [1, 2], + gener: [1, 2, 6], + prev_skill_matches_file_nam: [1, 2], + previou: [1, 2], + hard_labelled_skills_file_nam: 1, + hier_name_mapper_file_nam: [1, 2], + hierarchi: [1, 2, 7], + mapper: [1, 2], + num_hier_level: 1, + number: [1, 2, 6, 7], + level: [1, 6, 7], + set: [1, 2, 4, 6, 7], + 0: [1, 6, 7], + ha: 1, + skill_type_dict: 1, + dict: [1, 2], + A: [1, 2, 5, 6], + dictionari: [1, 2], + type: [1, 2], + skill_typ: 1, + column: 1, + hier_typ: 1, + least: [1, 7], + granular: [1, 6], + match_thresholds_dict: 1, + threshold: [1, 6, 7], + For: [1, 4, 6, 7], + skill_match_thresh: 1, + 7: [1, 6, 7], + top_tax_skil: 1, + 5: [1, 7], + max_shar: 1, + see: [1, 6], + detail: [1, 6], + what: 1, + repres: 1, + skill_name_col: 1, + skill_id_col: 1, + id: 1, + row: 1, + should: [1, 6, 7], + uniqu: 1, + skill_hier_info_col: 1, + info: 1, + na: 1, + skill_type_col: 1, + hier: 1, + categori: 1, + subcategori: 1, + wai: [1, 2, 7], + look: [1, 4, 6, 7], + spreadsheet: 1, + softwar: [1, 5], + abcd: 1, + 6: [1, 7], + skill_group_3: 1, + nan: 1, + mathemat: 1, + skill_group_2: 1, + 4: 1, + becaus: 1, + try: [1, 6], + individu: [1, 2, 4, 7], + isn: 1, + correspond: 1, + rather: [1, 2], + than: [1, 2, 7], + show: [1, 7], + all: [1, 2, 5, 6, 7], + part: [1, 4, 6], + where: [1, 2, 6, 7], + situat: [1, 4], + It: [1, 2], + help: 1, + link: 1, + g: [1, 4, 6], + sinc: 1, + content: 1, + string: [1, 2, 6], + both: [1, 2, 6, 7], + provid: [1, 5, 6], + guidanc: 1, + re: 1, + script: 1, + extract_skills_taxonomy_nam: 1, + describ: 1, + save: 1, + your_current_path: 1, + against: 1, + second: 1, + third: 1, + level_2: 1, + level_3: 1, + now: 1, + my_custom_config_nam: 1, + verbos: 2, + multi_process: 2, + config_path: 2, + thei: [2, 7], + public: 2, + limit: [2, 5, 7], + messag: 2, + debug: 2, + multiprocess: 2, + none: 2, + hard_labelled_skills_nam: 2, + previous: 2, + jobner: 2, + skillmapp: 2, + job_adverts_skil: 2, + union: 2, + take: [2, 6], + input: [2, 7], + correctli: 2, + return: 2, + togeth: 2, + skill_ent: 2, + taxonomy_skill_nam: 2, + taxonomy_skill_id: 2, + up: 2, + form: [2, 4], + microsoft: 2, + equal: 2, + accordingli: 2, + same: [2, 6], + assum: 2, + treat: 2, + singl: [2, 4, 7], + arrang: 2, + took: 4, + approach: [4, 6, 7], + got: 4, + studio: 4, + process: 4, + interfac: 4, + task: 4, + tri: 4, + best: [4, 7], + start: 4, + verb: 4, + sometim: 4, + wasn: 4, + easi: 4, + earlier: 4, + might: [4, 6], + later: 4, + team: 4, + basi: 4, + could: [4, 6], + sens: 4, + without: [4, 5, 7], + word: [4, 6], + whole: 4, + often: 4, + insur: 4, + qualif: 4, + electr: 4, + 5641: [4, 6], + 375: [4, 6], + 354: [4, 6], + 4696: [4, 6], + 608: [4, 6], + 20: [4, 6], + held: [4, 6], + copyright: 5, + c: 5, + 2022: [5, 6], + permiss: 5, + herebi: 5, + grant: 5, + free: 5, + charg: 5, + person: 5, + obtain: 5, + copi: 5, + associ: 5, + deal: 5, + restrict: 5, + right: 5, + merg: 5, + publish: 5, + distribut: 5, + sublicens: 5, + sell: 5, + permit: 5, + whom: 5, + furnish: 5, + subject: 5, + condit: 5, + abov: [5, 6], + notic: 5, + shall: 5, + substanti: 5, + portion: 5, + THE: 5, + AS: 5, + warranti: 5, + OF: 5, + kind: 5, + express: 5, + OR: 5, + impli: 5, + BUT: 5, + NOT: 5, + TO: 5, + merchant: 5, + fit: 5, + FOR: 5, + noninfring: 5, + IN: 5, + NO: 5, + event: 5, + author: 5, + holder: 5, + BE: 5, + liabl: 5, + claim: 5, + damag: 5, + other: [5, 7], + liabil: 5, + action: 5, + contract: 5, + tort: 5, + otherwis: [5, 7], + aris: 5, + connect: [5, 6], + WITH: 5, + two: [6, 7], + scientist: 6, + analyt: 6, + practic: [6, 7], + last: 6, + updat: 6, + 23: 6, + compon: 6, + scikit: 6, + svm: [6, 7], + featur: 6, + As: 6, + "15th": 6, + novemb: 6, + ojo: 6, + neural: 6, + network: 6, + architectur: 6, + Their: 6, + sophist: 6, + strategi: 6, + subword: 6, + bloom: 6, + deep: 6, + convolut: 6, + residu: 6, + novel: 6, + transit: 6, + pars: 6, + creation: 6, + nerevalu: 6, + calcul: [6, 7], + f1: 6, + precis: 6, + recal: 6, + classifi: 6, + result: [6, 7], + 586: 6, + 679: 6, + 515: 6, + 506: 6, + 648: 6, + 416: 6, + 563: 6, + 643: 6, + 500: 6, + partial: 6, + across: 6, + ner_model: 6, + 20220825: 6, + train_detail: 6, + json: 6, + On: [6, 7], + achiev: 6, + 91: 6, + accuraci: 6, + splitter: 6, + 253: 6, + ok: [6, 7], + bad: 6, + Of: [6, 7], + 80: 6, + 66: 6, + 9: [6, 7], + 25: 6, + mani: [6, 7], + get: 6, + unabl: 6, + still: 6, + futur: 6, + add: 6, + dedupl: [6, 7], + mean: 6, + mention: 6, + twice: 6, + just: 6, + onc: 6, + howev: [6, 7], + slightli: 6, + occurr: 6, + specif: 6, + disambigu: 6, + acronym: 6, + improv: 6, + methodologi: 6, + cosin: [6, 7], + huggingfac: [6, 7], + transform: [6, 7], + minilm: [6, 7], + l6: [6, 7], + v2: [6, 7], + close: 6, + assign: 6, + maximum: 6, + share: 6, + diagram: 6, + commonli: 6, + occur: 6, + doe: 6, + less: 6, + metaphor: 6, + understand: [6, 7], + bigger: 6, + pictur: 6, + draft: 6, + orsa: 6, + fine: 6, + art: 6, + poor: 6, + assist: [6, 7], + peopl: 6, + ensur: 6, + highest: 6, + unmatch: 6, + under: 6, + identifi: [6, 7], + approxim: 6, + relev: 6, + tune: 6, + high: 7, + overal: 7, + vector: 7, + machin: 7, + emb: 7, + pre: 7, + further: 7, + feedback: 7, + contact: 7, + liz: 7, + gallagh: 7, + india: 7, + kerl: 7, + cath: 7, + sleeman: 7, + onlin: 7, + research: 7, + labour: 7, + relat: 7, + govern: 7, + bodi: 7, + non: 7, + english: 7, + languag: 7, + draw: 7, + conclus: 7, + unidentifi: 7, + determin: 7, + demand: 7, + expert: 7, + steer: 7, + nor: 7, + discriminatori: 7, + hire: 7, + exact: 7, + sever: 7, + proxi: 7, + better: 7, + compar: 7, + 75: 7, + percentil: 7, + count: 7, + At: 7, + find: 7, + 58: 7, + averag: 7, + 345: 7, + 54: 7, + transvers: 7, + addit: 7, + 94: 7, + percent: 7, + median: 7, + 97: 7, + 84: 7, + 99: 7, + 10: 7, + while: 7, + 42: 7, + 40: 7, + 39: 7, + percentag: 7, + 33: 7, + qualit: 7, + veri: 7, + financi: 7, + account: 7, + v: 7, + financ: 7, + consid: 7, + inappropri: 7, + ey: 7, + examin: 7, + suppli: 7, + chain: 7, + role: 7, + truth: 7, + floor: 7, + care: 7, + 64: 7, + judg: 7, + shouldn: 7, + api: 7, + 27: 7, + 12: 7, + tag: 7, + thought: 7, + had: 7, + felt: 7, + 183: 7, + 73: 7, + 19: 7, + 8: 7, + 172: 7, + 53: 7, + 30: 7, + 17: 7, + 16: 7, + 35: 7, + 83: 7, + 87: 7, + lowest: 7, + its: 7, + score: 7, + 71: 7, + 51: 7, + 90: 7, + attitud: 7, + 63: 7, + knoweldg: 7, + "00": 7, + }, + objects: { + "ojd_daps_skills.pipeline.extract_skills.extract_skills": [ + [2, 0, 1, "", "ExtractSkills"], + ], + "ojd_daps_skills.pipeline.extract_skills.extract_skills.ExtractSkills": [ + [2, 1, 1, "", "extract_skills"], + [2, 1, 1, "", "format_skills"], + [2, 1, 1, "", "get_skills"], + [2, 1, 1, "", "load"], + [2, 1, 1, "", "map_skills"], + ], + }, + objtypes: { 0: "py:class", 1: "py:method" }, + objnames: { + 0: ["py", "class", "Python class"], + 1: ["py", "method", "Python method"], + }, + titleterms: { + skill: [0, 6, 7], + extractor: 0, + welcom: 0, + nesta: 0, + "": [0, 7], + librari: 0, + instal: 0, + name: [0, 1, 6], + aw: [], + cli: [], + tl: 0, + dr: 0, + us: [0, 7], + usag: [0, 1], + 1: [0, 7], + extract: [0, 7], + AND: 0, + map: [0, 1, 6, 7], + 2: [0, 7], + 3: 0, + app: 0, + develop: 0, + setup: 0, + project: 0, + structur: 0, + test: 0, + analysi: 0, + contributor: 0, + guidelin: 0, + custom: 1, + configur: 1, + file: 1, + config_fil: 1, + predefin: 1, + predefined_config: 1, + definit: 1, + config_def: 1, + your: 1, + own: 1, + taxonomi: [1, 6], + format: 1, + format_tax: 1, + defin: 1, + custom_config: 1, + The: [2, 5], + extractskil: 2, + class: 2, + entiti: [4, 6], + label: 4, + train: [4, 6], + dataset: 4, + mit: 5, + licens: 5, + model: 6, + card: 6, + recognit: 6, + extract_skills_card: 6, + summari: [6, 7], + ner: 6, + metric: [6, 7], + multiskil: 6, + caveat: 6, + recommend: 6, + mapping_card: 6, + factor: 6, + pipelin: 7, + intend: 7, + out: 7, + scope: 7, + comparison: 7, + top: 7, + group: 7, + per: 7, + occup: 7, + esco: 7, + essenti: 7, + degre: 7, + overlap: 7, + between: 7, + lightcast: 7, + our: 7, + evalu: 7, + manual: 7, + judgement: 7, + fals: 7, + posit: 7, + rate: 7, + qualiti: 7, + }, + envversion: { + "sphinx.domains.c": 2, + "sphinx.domains.changeset": 1, + "sphinx.domains.citation": 1, + "sphinx.domains.cpp": 8, + "sphinx.domains.index": 1, + "sphinx.domains.javascript": 2, + "sphinx.domains.math": 2, + "sphinx.domains.python": 3, + "sphinx.domains.rst": 2, + "sphinx.domains.std": 2, + "sphinx.ext.viewcode": 1, + sphinx: 57, + }, + alltitles: { + "Skills Extractor": [[0, "skills-extractor"]], + "Welcome to Nesta\u2019s Skills Extractor Library": [ + [0, "welcome-to-nesta-s-skills-extractor-library"], + ], + 'Installation ': [[0, "installation"]], + 'TL;DR: Using Nesta\u2019s Skills Extractor library ': [ + [0, "tl-dr-using-nesta-s-skills-extractor-library"], + ], + "1. Extract AND map skills": [[0, "extract-and-map-skills"]], + "2. Extract skills": [[0, "extract-skills"]], + "3. Map skills": [[0, "map-skills"]], + App: [[0, "app"]], + 'Development ': [[0, "development"]], + Setup: [[0, "setup"]], + "Project structure": [[0, "project-structure"]], + Testing: [[0, "testing"]], + Analysis: [[0, "analysis"]], + "Contributor guidelines": [[0, "contributor-guidelines"]], + "Custom Usage": [[1, "custom-usage"]], + 'Configuration files ': [ + [1, "configuration-files"], + ], + 'Predefined configurations ': [ + [1, "predefined-configurations"], + ], + 'Configuration definitions ': [ + [1, "configuration-definitions"], + ], + 'Mapping to your own taxonomy ': [ + [1, "mapping-to-your-own-taxonomy"], + ], + 'Format your taxonomy ': [ + [1, "format-your-taxonomy"], + ], + 'Define your own configuration file ': [ + [1, "define-your-own-configuration-file"], + ], + "The ExtractSkills class": [[2, "the-extractskills-class"]], + "Entity Labelling": [[4, "entity-labelling"]], + "Training dataset": [[4, "training-dataset"]], + "The MIT License (MIT)": [[5, "the-mit-license-mit"]], + "Model Cards": [[6, "model-cards"]], + 'Model Card: Named Entity Recognition Model ': [ + [6, "model-card-named-entity-recognition-model"], + ], + Summary: [ + [6, "summary"], + [6, "id1"], + ], + Training: [[6, "training"]], + "NER Metrics": [[6, "ner-metrics"]], + "Multiskill Metrics": [[6, "multiskill-metrics"]], + "Caveats and Recommendations": [ + [6, "caveats-and-recommendations"], + [6, "id2"], + ], + 'Model Card: Skills to Taxonomy Mapping ': [ + [6, "model-card-skills-to-taxonomy-mapping"], + ], + "Model Factors": [[6, "model-factors"]], + "Pipeline summary and metrics": [[7, "pipeline-summary-and-metrics"]], + "Intended Use": [[7, "intended-use"]], + "Out of Scope Uses": [[7, "out-of-scope-uses"]], + Metrics: [[7, "metrics"]], + "Comparison 1 - Top skill groups per occupation comparison to ESCO essential skill groups per occupation": [ + [ + 7, + "comparison-1-top-skill-groups-per-occupation-comparison-to-esco-essential-skill-groups-per-occupation", + ], + ], + "Comparison 2 - Degree of overlap between Lightcast\u2019s extracted skills and our Lightcast skills": [ + [ + 7, + "comparison-2-degree-of-overlap-between-lightcasts-extracted-skills-and-our-lightcast-skills", + ], + ], + "Evaluation 1 - Manual judgement of false positive rate": [ + [7, "evaluation-1-manual-judgement-of-false-positive-rate"], + ], + "Evaluation 2 - Manual judgement of skills extraction and mapping quality": [ + [ + 7, + "evaluation-2-manual-judgement-of-skills-extraction-and-mapping-quality", + ], + ], + }, + indexentries: { + "extractskills (class in ojd_daps_skills.pipeline.extract_skills.extract_skills)": [ + [ + 2, + "ojd_daps_skills.pipeline.extract_skills.extract_skills.ExtractSkills", + ], + ], + "extract_skills() (ojd_daps_skills.pipeline.extract_skills.extract_skills.extractskills method)": [ + [ + 2, + "ojd_daps_skills.pipeline.extract_skills.extract_skills.ExtractSkills.extract_skills", + ], + ], + "format_skills() (ojd_daps_skills.pipeline.extract_skills.extract_skills.extractskills method)": [ + [ + 2, + "ojd_daps_skills.pipeline.extract_skills.extract_skills.ExtractSkills.format_skills", + ], + ], + "get_skills() (ojd_daps_skills.pipeline.extract_skills.extract_skills.extractskills method)": [ + [ + 2, + "ojd_daps_skills.pipeline.extract_skills.extract_skills.ExtractSkills.get_skills", + ], + ], + "load() (ojd_daps_skills.pipeline.extract_skills.extract_skills.extractskills method)": [ + [ + 2, + "ojd_daps_skills.pipeline.extract_skills.extract_skills.ExtractSkills.load", + ], + ], + "map_skills() (ojd_daps_skills.pipeline.extract_skills.extract_skills.extractskills method)": [ + [ + 2, + "ojd_daps_skills.pipeline.extract_skills.extract_skills.ExtractSkills.map_skills", + ], + ], + }, +}); diff --git a/ojd_daps_skills/app/requirements_app.txt b/ojd_daps_skills/app/requirements_app.txt index d1d944f6..48a229aa 100644 --- a/ojd_daps_skills/app/requirements_app.txt +++ b/ojd_daps_skills/app/requirements_app.txt @@ -1,4 +1,2 @@ -numpy==1.21.1 ojd-daps-skills -awscli==1.27.25 -streamlit== 1.16.0 \ No newline at end of file +streamlit==1.16.0 diff --git a/ojd_daps_skills/getters/download_public_data.py b/ojd_daps_skills/getters/download_public_data.py index a04656bc..9de3e80b 100644 --- a/ojd_daps_skills/getters/download_public_data.py +++ b/ojd_daps_skills/getters/download_public_data.py @@ -1,30 +1,35 @@ from ojd_daps_skills import PUBLIC_DATA_FOLDER_NAME, PROJECT_DIR import os -import platform -import zipfile +import boto3 +from botocore.exceptions import ClientError +from botocore import UNSIGNED +from botocore.config import Config +from zipfile import ZipFile def download(): + """Download public data. Expected to run once on first use.""" + s3 = boto3.client( + "s3", region_name="eu-west-1", config=Config(signature_version=UNSIGNED) + ) + + bucket_name = "open-jobs-indicators" + key = f"escoe_extension/{PUBLIC_DATA_FOLDER_NAME}.zip" public_data_dir = os.path.join(PROJECT_DIR, PUBLIC_DATA_FOLDER_NAME) - if platform.system() == "Windows": - os.system( - f'aws --no-sign-request --region=eu-west-1 s3 cp s3://open-jobs-indicators/escoe_extension/{PUBLIC_DATA_FOLDER_NAME}.zip "{public_data_dir}.zip"' - ) - with zipfile.ZipFile(f"{public_data_dir}.zip", 'r') as zip_ref: - zip_ref.extractall(f"{PROJECT_DIR}") - zip_ref.close() + try: + s3.download_file(bucket_name, key, f"{public_data_dir}.zip") + + with ZipFile(f"{public_data_dir}.zip", "r") as zip_ref: + zip_ref.extractall(PROJECT_DIR) + os.remove(f"{public_data_dir}.zip") - - return - os.system( - f"aws --no-sign-request --region=eu-west-1 s3 cp s3://open-jobs-indicators/escoe_extension/{PUBLIC_DATA_FOLDER_NAME}.zip {public_data_dir}.zip" - ) - os.system(f"unzip {public_data_dir}.zip -d {PROJECT_DIR}") - os.system(f"rm {public_data_dir}.zip") - + except ClientError as ce: + print(f"Error: {ce}") + except FileNotFoundError as fnfe: + print(f"Error: {fnfe}") if __name__ == "__main__": diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 00000000..a6281427 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,8 @@ +[build-system] +requires = [ + "setuptools>=64", + "setuptools_scm>=8", +] + +[tool.setuptools_scm] +version_scheme = "release-branch-semver" diff --git a/requirements.txt b/requirements.txt index 02d6ff95..d02fdea5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,15 +1,16 @@ -numpy==1.22.4 -scipy==1.8.1 +numpy==1.24.4 +scipy==1.10.1 pandas==1.3.5 tqdm==4.64.0 filelock==3.7.1 typer==0.4.1 sh==1.14.2 -transformers==4.20.1 +transformers==4.33.3 sentence-transformers==2.2.2 -scikit-learn==0.23.2 +scikit-learn==1.3.1 spacy==3.4.0 nervaluate==0.1.8 s3fs==2022.5.0 boto3==1.21.21 toolz==0.12.0 +en_core_web_sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.4.1/en_core_web_sm-3.4.1.tar.gz diff --git a/requirements_dev.txt b/requirements_dev.txt index e3ec023d..9124b614 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -9,7 +9,6 @@ black Sphinx sphinxcontrib-napoleon sphinx-rtd-theme -awscli==1.27.32 pre-commit pre-commit-hooks spacy==3.4.0 diff --git a/setup.py b/setup.py index d66c0f72..ea57b476 100644 --- a/setup.py +++ b/setup.py @@ -2,17 +2,7 @@ from pathlib import Path from setuptools import find_packages from setuptools import setup - -import os -import platform -import subprocess - - -tag_cmd = "git describe --tags --abbrev=0" -tag_cmd = tag_cmd if platform.system() == "Windows" else f"echo $({tag_cmd})" -tag_version = ( - subprocess.check_output(tag_cmd, shell=True).decode("ascii").replace("\n", "") -) +import setuptools_scm def read_lines(path): @@ -26,21 +16,21 @@ def read_lines(path): setup( name="ojd_daps_skills", - long_description=open(os.path.join(BASE_DIR, "README.md"), encoding="utf-8").read(), + long_description=open(BASE_DIR / "README.md", encoding="utf-8").read(), long_description_content_type="text/markdown", - install_requires=read_lines(os.path.join(BASE_DIR, "requirements.txt")), - extras_require={"dev": read_lines(os.path.join(BASE_DIR, "requirements_dev.txt"))}, + install_requires=read_lines(BASE_DIR / "requirements.txt"), + extras_require={"dev": read_lines(BASE_DIR / "requirements_dev.txt")}, packages=find_packages( exclude=["docs", "ojd_daps_skills/analysis", "ojd_daps_skills/app"] ), - classifiers=['Development Status :: 5 - Production/Stable'], + classifiers=["Development Status :: 5 - Production/Stable"], package_data={ # If any package contains *.yaml files, include them: "": [ "*.yaml", ], }, - version=tag_version, + version=setuptools_scm.get_version(), description="Extract skills from job ads and maps them onto a skills taxonomy of your choice.", url="https://github.com/nestauk/ojd_daps_skills", project_urls={