diff --git a/.nojekyll b/.nojekyll new file mode 100644 index 00000000..e69de29b diff --git a/404.html b/404.html new file mode 100644 index 00000000..31b4bdc2 --- /dev/null +++ b/404.html @@ -0,0 +1,110 @@ + + + + + + + + Quinn + + + + + + + + + + + + +
+ + +
+ +
+
+
    +
  • +
  • +
  • +
+
+
+
+
+ + +

404

+ +

Page not found

+ + +
+
+ +
+
+ +
+ +
+ +
+ + + + + +
+ + + + + + + + + diff --git a/assets/_mkdocstrings.css b/assets/_mkdocstrings.css new file mode 100644 index 00000000..98e1c72a --- /dev/null +++ b/assets/_mkdocstrings.css @@ -0,0 +1,28 @@ + +/* Avoid breaking parameters name, etc. in table cells. */ +.doc-contents td code { + word-break: normal !important; +} + +/* For pieces of Markdown rendered in table cells. */ +.doc-contents td p { + margin-top: 0 !important; + margin-bottom: 0 !important; +} + +/* Avoid breaking code headings. */ +.doc-heading code { + white-space: normal; +} + +/* Improve rendering of parameters, returns and exceptions. */ +.doc-contents .field-name { + min-width: 100px; +} +.doc-contents .field-name, .field-body { + border: none !important; + padding: 0 !important; +} +.doc-contents .field-list { + margin: 0 !important; +} diff --git a/css/fonts/Roboto-Slab-Bold.woff b/css/fonts/Roboto-Slab-Bold.woff new file mode 100644 index 00000000..6cb60000 Binary files /dev/null and b/css/fonts/Roboto-Slab-Bold.woff differ diff --git a/css/fonts/Roboto-Slab-Bold.woff2 b/css/fonts/Roboto-Slab-Bold.woff2 new file mode 100644 index 00000000..7059e231 Binary files /dev/null and b/css/fonts/Roboto-Slab-Bold.woff2 differ diff --git a/css/fonts/Roboto-Slab-Regular.woff b/css/fonts/Roboto-Slab-Regular.woff new file mode 100644 index 00000000..f815f63f Binary files /dev/null and b/css/fonts/Roboto-Slab-Regular.woff differ diff --git a/css/fonts/Roboto-Slab-Regular.woff2 b/css/fonts/Roboto-Slab-Regular.woff2 new file mode 100644 index 00000000..f2c76e5b Binary files /dev/null and b/css/fonts/Roboto-Slab-Regular.woff2 differ diff --git a/css/fonts/fontawesome-webfont.eot b/css/fonts/fontawesome-webfont.eot new file mode 100644 index 00000000..e9f60ca9 Binary files /dev/null and b/css/fonts/fontawesome-webfont.eot differ diff --git a/css/fonts/fontawesome-webfont.svg b/css/fonts/fontawesome-webfont.svg new file mode 100644 index 00000000..855c845e --- /dev/null +++ b/css/fonts/fontawesome-webfont.svg @@ -0,0 +1,2671 @@ + + + + +Created by FontForge 20120731 at Mon Oct 24 17:37:40 2016 + By ,,, +Copyright Dave Gandy 2016. All rights reserved. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/css/fonts/fontawesome-webfont.ttf b/css/fonts/fontawesome-webfont.ttf new file mode 100644 index 00000000..35acda2f Binary files /dev/null and b/css/fonts/fontawesome-webfont.ttf differ diff --git a/css/fonts/fontawesome-webfont.woff b/css/fonts/fontawesome-webfont.woff new file mode 100644 index 00000000..400014a4 Binary files /dev/null and b/css/fonts/fontawesome-webfont.woff differ diff --git a/css/fonts/fontawesome-webfont.woff2 b/css/fonts/fontawesome-webfont.woff2 new file mode 100644 index 00000000..4d13fc60 Binary files /dev/null and b/css/fonts/fontawesome-webfont.woff2 differ diff --git a/css/fonts/lato-bold-italic.woff b/css/fonts/lato-bold-italic.woff new file mode 100644 index 00000000..88ad05b9 Binary files /dev/null and b/css/fonts/lato-bold-italic.woff differ diff --git a/css/fonts/lato-bold-italic.woff2 b/css/fonts/lato-bold-italic.woff2 new file mode 100644 index 00000000..c4e3d804 Binary files /dev/null and b/css/fonts/lato-bold-italic.woff2 differ diff --git a/css/fonts/lato-bold.woff b/css/fonts/lato-bold.woff new file mode 100644 index 00000000..c6dff51f Binary files /dev/null and b/css/fonts/lato-bold.woff differ diff --git a/css/fonts/lato-bold.woff2 b/css/fonts/lato-bold.woff2 new file mode 100644 index 00000000..bb195043 Binary files /dev/null and b/css/fonts/lato-bold.woff2 differ diff --git a/css/fonts/lato-normal-italic.woff b/css/fonts/lato-normal-italic.woff new file mode 100644 index 00000000..76114bc0 Binary files /dev/null and b/css/fonts/lato-normal-italic.woff differ diff --git a/css/fonts/lato-normal-italic.woff2 b/css/fonts/lato-normal-italic.woff2 new file mode 100644 index 00000000..3404f37e Binary files /dev/null and b/css/fonts/lato-normal-italic.woff2 differ diff --git a/css/fonts/lato-normal.woff b/css/fonts/lato-normal.woff new file mode 100644 index 00000000..ae1307ff Binary files /dev/null and b/css/fonts/lato-normal.woff differ diff --git a/css/fonts/lato-normal.woff2 b/css/fonts/lato-normal.woff2 new file mode 100644 index 00000000..3bf98433 Binary files /dev/null and b/css/fonts/lato-normal.woff2 differ diff --git a/css/theme.css b/css/theme.css new file mode 100644 index 00000000..ad773009 --- /dev/null +++ b/css/theme.css @@ -0,0 +1,13 @@ +/* + * This file is copied from the upstream ReadTheDocs Sphinx + * theme. To aid upgradability this file should *not* be edited. + * modifications we need should be included in theme_extra.css. + * + * https://github.com/readthedocs/sphinx_rtd_theme + */ + + /* sphinx_rtd_theme version 1.2.0 | MIT license */ +html{box-sizing:border-box}*,:after,:before{box-sizing:inherit}article,aside,details,figcaption,figure,footer,header,hgroup,nav,section{display:block}audio,canvas,video{display:inline-block;*display:inline;*zoom:1}[hidden],audio:not([controls]){display:none}*{-webkit-box-sizing:border-box;-moz-box-sizing:border-box;box-sizing:border-box}html{font-size:100%;-webkit-text-size-adjust:100%;-ms-text-size-adjust:100%}body{margin:0}a:active,a:hover{outline:0}abbr[title]{border-bottom:1px dotted}b,strong{font-weight:700}blockquote{margin:0}dfn{font-style:italic}ins{background:#ff9;text-decoration:none}ins,mark{color:#000}mark{background:#ff0;font-style:italic;font-weight:700}.rst-content code,.rst-content tt,code,kbd,pre,samp{font-family:monospace,serif;_font-family:courier new,monospace;font-size:1em}pre{white-space:pre}q{quotes:none}q:after,q:before{content:"";content:none}small{font-size:85%}sub,sup{font-size:75%;line-height:0;position:relative;vertical-align:baseline}sup{top:-.5em}sub{bottom:-.25em}dl,ol,ul{margin:0;padding:0;list-style:none;list-style-image:none}li{list-style:none}dd{margin:0}img{border:0;-ms-interpolation-mode:bicubic;vertical-align:middle;max-width:100%}svg:not(:root){overflow:hidden}figure,form{margin:0}label{cursor:pointer}button,input,select,textarea{font-size:100%;margin:0;vertical-align:baseline;*vertical-align:middle}button,input{line-height:normal}button,input[type=button],input[type=reset],input[type=submit]{cursor:pointer;-webkit-appearance:button;*overflow:visible}button[disabled],input[disabled]{cursor:default}input[type=search]{-webkit-appearance:textfield;-moz-box-sizing:content-box;-webkit-box-sizing:content-box;box-sizing:content-box}textarea{resize:vertical}table{border-collapse:collapse;border-spacing:0}td{vertical-align:top}.chromeframe{margin:.2em 0;background:#ccc;color:#000;padding:.2em 0}.ir{display:block;border:0;text-indent:-999em;overflow:hidden;background-color:transparent;background-repeat:no-repeat;text-align:left;direction:ltr;*line-height:0}.ir br{display:none}.hidden{display:none!important;visibility:hidden}.visuallyhidden{border:0;clip:rect(0 0 0 0);height:1px;margin:-1px;overflow:hidden;padding:0;position:absolute;width:1px}.visuallyhidden.focusable:active,.visuallyhidden.focusable:focus{clip:auto;height:auto;margin:0;overflow:visible;position:static;width:auto}.invisible{visibility:hidden}.relative{position:relative}big,small{font-size:100%}@media print{body,html,section{background:none!important}*{box-shadow:none!important;text-shadow:none!important;filter:none!important;-ms-filter:none!important}a,a:visited{text-decoration:underline}.ir a:after,a[href^="#"]:after,a[href^="javascript:"]:after{content:""}blockquote,pre{page-break-inside:avoid}thead{display:table-header-group}img,tr{page-break-inside:avoid}img{max-width:100%!important}@page{margin:.5cm}.rst-content .toctree-wrapper>p.caption,h2,h3,p{orphans:3;widows:3}.rst-content .toctree-wrapper>p.caption,h2,h3{page-break-after:avoid}}.btn,.fa:before,.icon:before,.rst-content .admonition,.rst-content .admonition-title:before,.rst-content .admonition-todo,.rst-content .attention,.rst-content .caution,.rst-content .code-block-caption .headerlink:before,.rst-content .danger,.rst-content .eqno .headerlink:before,.rst-content .error,.rst-content .hint,.rst-content .important,.rst-content .note,.rst-content .seealso,.rst-content .tip,.rst-content .warning,.rst-content code.download span:first-child:before,.rst-content dl dt .headerlink:before,.rst-content h1 .headerlink:before,.rst-content h2 .headerlink:before,.rst-content h3 .headerlink:before,.rst-content h4 .headerlink:before,.rst-content h5 .headerlink:before,.rst-content h6 .headerlink:before,.rst-content p.caption .headerlink:before,.rst-content p .headerlink:before,.rst-content table>caption .headerlink:before,.rst-content tt.download span:first-child:before,.wy-alert,.wy-dropdown .caret:before,.wy-inline-validate.wy-inline-validate-danger .wy-input-context:before,.wy-inline-validate.wy-inline-validate-info .wy-input-context:before,.wy-inline-validate.wy-inline-validate-success .wy-input-context:before,.wy-inline-validate.wy-inline-validate-warning .wy-input-context:before,.wy-menu-vertical li.current>a button.toctree-expand:before,.wy-menu-vertical li.on a button.toctree-expand:before,.wy-menu-vertical li button.toctree-expand:before,input[type=color],input[type=date],input[type=datetime-local],input[type=datetime],input[type=email],input[type=month],input[type=number],input[type=password],input[type=search],input[type=tel],input[type=text],input[type=time],input[type=url],input[type=week],select,textarea{-webkit-font-smoothing:antialiased}.clearfix{*zoom:1}.clearfix:after,.clearfix:before{display:table;content:""}.clearfix:after{clear:both}/*! + * Font Awesome 4.7.0 by @davegandy - http://fontawesome.io - @fontawesome + * License - http://fontawesome.io/license (Font: SIL OFL 1.1, CSS: MIT License) + */@font-face{font-family:FontAwesome;src:url(fonts/fontawesome-webfont.eot?674f50d287a8c48dc19ba404d20fe713);src:url(fonts/fontawesome-webfont.eot?674f50d287a8c48dc19ba404d20fe713?#iefix&v=4.7.0) format("embedded-opentype"),url(fonts/fontawesome-webfont.woff2?af7ae505a9eed503f8b8e6982036873e) format("woff2"),url(fonts/fontawesome-webfont.woff?fee66e712a8a08eef5805a46892932ad) format("woff"),url(fonts/fontawesome-webfont.ttf?b06871f281fee6b241d60582ae9369b9) format("truetype"),url(fonts/fontawesome-webfont.svg?912ec66d7572ff821749319396470bde#fontawesomeregular) format("svg");font-weight:400;font-style:normal}.fa,.icon,.rst-content .admonition-title,.rst-content .code-block-caption .headerlink,.rst-content .eqno .headerlink,.rst-content code.download span:first-child,.rst-content dl dt .headerlink,.rst-content h1 .headerlink,.rst-content h2 .headerlink,.rst-content h3 .headerlink,.rst-content h4 .headerlink,.rst-content h5 .headerlink,.rst-content h6 .headerlink,.rst-content p.caption .headerlink,.rst-content p .headerlink,.rst-content table>caption .headerlink,.rst-content tt.download span:first-child,.wy-menu-vertical li.current>a button.toctree-expand,.wy-menu-vertical li.on a button.toctree-expand,.wy-menu-vertical li button.toctree-expand{display:inline-block;font:normal normal normal 14px/1 FontAwesome;font-size:inherit;text-rendering:auto;-webkit-font-smoothing:antialiased;-moz-osx-font-smoothing:grayscale}.fa-lg{font-size:1.33333em;line-height:.75em;vertical-align:-15%}.fa-2x{font-size:2em}.fa-3x{font-size:3em}.fa-4x{font-size:4em}.fa-5x{font-size:5em}.fa-fw{width:1.28571em;text-align:center}.fa-ul{padding-left:0;margin-left:2.14286em;list-style-type:none}.fa-ul>li{position:relative}.fa-li{position:absolute;left:-2.14286em;width:2.14286em;top:.14286em;text-align:center}.fa-li.fa-lg{left:-1.85714em}.fa-border{padding:.2em .25em .15em;border:.08em solid #eee;border-radius:.1em}.fa-pull-left{float:left}.fa-pull-right{float:right}.fa-pull-left.icon,.fa.fa-pull-left,.rst-content .code-block-caption .fa-pull-left.headerlink,.rst-content .eqno .fa-pull-left.headerlink,.rst-content .fa-pull-left.admonition-title,.rst-content code.download span.fa-pull-left:first-child,.rst-content dl dt .fa-pull-left.headerlink,.rst-content h1 .fa-pull-left.headerlink,.rst-content h2 .fa-pull-left.headerlink,.rst-content h3 .fa-pull-left.headerlink,.rst-content h4 .fa-pull-left.headerlink,.rst-content h5 .fa-pull-left.headerlink,.rst-content h6 .fa-pull-left.headerlink,.rst-content p .fa-pull-left.headerlink,.rst-content table>caption .fa-pull-left.headerlink,.rst-content tt.download span.fa-pull-left:first-child,.wy-menu-vertical li.current>a button.fa-pull-left.toctree-expand,.wy-menu-vertical li.on a button.fa-pull-left.toctree-expand,.wy-menu-vertical li button.fa-pull-left.toctree-expand{margin-right:.3em}.fa-pull-right.icon,.fa.fa-pull-right,.rst-content .code-block-caption .fa-pull-right.headerlink,.rst-content .eqno .fa-pull-right.headerlink,.rst-content .fa-pull-right.admonition-title,.rst-content code.download span.fa-pull-right:first-child,.rst-content dl dt .fa-pull-right.headerlink,.rst-content h1 .fa-pull-right.headerlink,.rst-content h2 .fa-pull-right.headerlink,.rst-content h3 .fa-pull-right.headerlink,.rst-content h4 .fa-pull-right.headerlink,.rst-content h5 .fa-pull-right.headerlink,.rst-content h6 .fa-pull-right.headerlink,.rst-content p .fa-pull-right.headerlink,.rst-content table>caption .fa-pull-right.headerlink,.rst-content tt.download span.fa-pull-right:first-child,.wy-menu-vertical li.current>a button.fa-pull-right.toctree-expand,.wy-menu-vertical li.on a button.fa-pull-right.toctree-expand,.wy-menu-vertical li button.fa-pull-right.toctree-expand{margin-left:.3em}.pull-right{float:right}.pull-left{float:left}.fa.pull-left,.pull-left.icon,.rst-content .code-block-caption .pull-left.headerlink,.rst-content .eqno .pull-left.headerlink,.rst-content .pull-left.admonition-title,.rst-content code.download span.pull-left:first-child,.rst-content dl dt .pull-left.headerlink,.rst-content h1 .pull-left.headerlink,.rst-content h2 .pull-left.headerlink,.rst-content h3 .pull-left.headerlink,.rst-content h4 .pull-left.headerlink,.rst-content h5 .pull-left.headerlink,.rst-content h6 .pull-left.headerlink,.rst-content p .pull-left.headerlink,.rst-content table>caption .pull-left.headerlink,.rst-content tt.download span.pull-left:first-child,.wy-menu-vertical li.current>a button.pull-left.toctree-expand,.wy-menu-vertical li.on a button.pull-left.toctree-expand,.wy-menu-vertical li button.pull-left.toctree-expand{margin-right:.3em}.fa.pull-right,.pull-right.icon,.rst-content .code-block-caption .pull-right.headerlink,.rst-content .eqno .pull-right.headerlink,.rst-content .pull-right.admonition-title,.rst-content code.download span.pull-right:first-child,.rst-content dl dt .pull-right.headerlink,.rst-content h1 .pull-right.headerlink,.rst-content h2 .pull-right.headerlink,.rst-content h3 .pull-right.headerlink,.rst-content h4 .pull-right.headerlink,.rst-content h5 .pull-right.headerlink,.rst-content h6 .pull-right.headerlink,.rst-content p .pull-right.headerlink,.rst-content table>caption .pull-right.headerlink,.rst-content tt.download span.pull-right:first-child,.wy-menu-vertical li.current>a button.pull-right.toctree-expand,.wy-menu-vertical li.on a button.pull-right.toctree-expand,.wy-menu-vertical li button.pull-right.toctree-expand{margin-left:.3em}.fa-spin{-webkit-animation:fa-spin 2s linear infinite;animation:fa-spin 2s linear infinite}.fa-pulse{-webkit-animation:fa-spin 1s steps(8) infinite;animation:fa-spin 1s steps(8) infinite}@-webkit-keyframes fa-spin{0%{-webkit-transform:rotate(0deg);transform:rotate(0deg)}to{-webkit-transform:rotate(359deg);transform:rotate(359deg)}}@keyframes fa-spin{0%{-webkit-transform:rotate(0deg);transform:rotate(0deg)}to{-webkit-transform:rotate(359deg);transform:rotate(359deg)}}.fa-rotate-90{-ms-filter:"progid:DXImageTransform.Microsoft.BasicImage(rotation=1)";-webkit-transform:rotate(90deg);-ms-transform:rotate(90deg);transform:rotate(90deg)}.fa-rotate-180{-ms-filter:"progid:DXImageTransform.Microsoft.BasicImage(rotation=2)";-webkit-transform:rotate(180deg);-ms-transform:rotate(180deg);transform:rotate(180deg)}.fa-rotate-270{-ms-filter:"progid:DXImageTransform.Microsoft.BasicImage(rotation=3)";-webkit-transform:rotate(270deg);-ms-transform:rotate(270deg);transform:rotate(270deg)}.fa-flip-horizontal{-ms-filter:"progid:DXImageTransform.Microsoft.BasicImage(rotation=0, mirror=1)";-webkit-transform:scaleX(-1);-ms-transform:scaleX(-1);transform:scaleX(-1)}.fa-flip-vertical{-ms-filter:"progid:DXImageTransform.Microsoft.BasicImage(rotation=2, mirror=1)";-webkit-transform:scaleY(-1);-ms-transform:scaleY(-1);transform:scaleY(-1)}:root .fa-flip-horizontal,:root .fa-flip-vertical,:root .fa-rotate-90,:root .fa-rotate-180,:root .fa-rotate-270{filter:none}.fa-stack{position:relative;display:inline-block;width:2em;height:2em;line-height:2em;vertical-align:middle}.fa-stack-1x,.fa-stack-2x{position:absolute;left:0;width:100%;text-align:center}.fa-stack-1x{line-height:inherit}.fa-stack-2x{font-size:2em}.fa-inverse{color:#fff}.fa-glass:before{content:""}.fa-music:before{content:""}.fa-search:before,.icon-search:before{content:""}.fa-envelope-o:before{content:""}.fa-heart:before{content:""}.fa-star:before{content:""}.fa-star-o:before{content:""}.fa-user:before{content:""}.fa-film:before{content:""}.fa-th-large:before{content:""}.fa-th:before{content:""}.fa-th-list:before{content:""}.fa-check:before{content:""}.fa-close:before,.fa-remove:before,.fa-times:before{content:""}.fa-search-plus:before{content:""}.fa-search-minus:before{content:""}.fa-power-off:before{content:""}.fa-signal:before{content:""}.fa-cog:before,.fa-gear:before{content:""}.fa-trash-o:before{content:""}.fa-home:before,.icon-home:before{content:""}.fa-file-o:before{content:""}.fa-clock-o:before{content:""}.fa-road:before{content:""}.fa-download:before,.rst-content code.download span:first-child:before,.rst-content tt.download span:first-child:before{content:""}.fa-arrow-circle-o-down:before{content:""}.fa-arrow-circle-o-up:before{content:""}.fa-inbox:before{content:""}.fa-play-circle-o:before{content:""}.fa-repeat:before,.fa-rotate-right:before{content:""}.fa-refresh:before{content:""}.fa-list-alt:before{content:""}.fa-lock:before{content:""}.fa-flag:before{content:""}.fa-headphones:before{content:""}.fa-volume-off:before{content:""}.fa-volume-down:before{content:""}.fa-volume-up:before{content:""}.fa-qrcode:before{content:""}.fa-barcode:before{content:""}.fa-tag:before{content:""}.fa-tags:before{content:""}.fa-book:before,.icon-book:before{content:""}.fa-bookmark:before{content:""}.fa-print:before{content:""}.fa-camera:before{content:""}.fa-font:before{content:""}.fa-bold:before{content:""}.fa-italic:before{content:""}.fa-text-height:before{content:""}.fa-text-width:before{content:""}.fa-align-left:before{content:""}.fa-align-center:before{content:""}.fa-align-right:before{content:""}.fa-align-justify:before{content:""}.fa-list:before{content:""}.fa-dedent:before,.fa-outdent:before{content:""}.fa-indent:before{content:""}.fa-video-camera:before{content:""}.fa-image:before,.fa-photo:before,.fa-picture-o:before{content:""}.fa-pencil:before{content:""}.fa-map-marker:before{content:""}.fa-adjust:before{content:""}.fa-tint:before{content:""}.fa-edit:before,.fa-pencil-square-o:before{content:""}.fa-share-square-o:before{content:""}.fa-check-square-o:before{content:""}.fa-arrows:before{content:""}.fa-step-backward:before{content:""}.fa-fast-backward:before{content:""}.fa-backward:before{content:""}.fa-play:before{content:""}.fa-pause:before{content:""}.fa-stop:before{content:""}.fa-forward:before{content:""}.fa-fast-forward:before{content:""}.fa-step-forward:before{content:""}.fa-eject:before{content:""}.fa-chevron-left:before{content:""}.fa-chevron-right:before{content:""}.fa-plus-circle:before{content:""}.fa-minus-circle:before{content:""}.fa-times-circle:before,.wy-inline-validate.wy-inline-validate-danger .wy-input-context:before{content:""}.fa-check-circle:before,.wy-inline-validate.wy-inline-validate-success .wy-input-context:before{content:""}.fa-question-circle:before{content:""}.fa-info-circle:before{content:""}.fa-crosshairs:before{content:""}.fa-times-circle-o:before{content:""}.fa-check-circle-o:before{content:""}.fa-ban:before{content:""}.fa-arrow-left:before{content:""}.fa-arrow-right:before{content:""}.fa-arrow-up:before{content:""}.fa-arrow-down:before{content:""}.fa-mail-forward:before,.fa-share:before{content:""}.fa-expand:before{content:""}.fa-compress:before{content:""}.fa-plus:before{content:""}.fa-minus:before{content:""}.fa-asterisk:before{content:""}.fa-exclamation-circle:before,.rst-content .admonition-title:before,.wy-inline-validate.wy-inline-validate-info .wy-input-context:before,.wy-inline-validate.wy-inline-validate-warning .wy-input-context:before{content:""}.fa-gift:before{content:""}.fa-leaf:before{content:""}.fa-fire:before,.icon-fire:before{content:""}.fa-eye:before{content:""}.fa-eye-slash:before{content:""}.fa-exclamation-triangle:before,.fa-warning:before{content:""}.fa-plane:before{content:""}.fa-calendar:before{content:""}.fa-random:before{content:""}.fa-comment:before{content:""}.fa-magnet:before{content:""}.fa-chevron-up:before{content:""}.fa-chevron-down:before{content:""}.fa-retweet:before{content:""}.fa-shopping-cart:before{content:""}.fa-folder:before{content:""}.fa-folder-open:before{content:""}.fa-arrows-v:before{content:""}.fa-arrows-h:before{content:""}.fa-bar-chart-o:before,.fa-bar-chart:before{content:""}.fa-twitter-square:before{content:""}.fa-facebook-square:before{content:""}.fa-camera-retro:before{content:""}.fa-key:before{content:""}.fa-cogs:before,.fa-gears:before{content:""}.fa-comments:before{content:""}.fa-thumbs-o-up:before{content:""}.fa-thumbs-o-down:before{content:""}.fa-star-half:before{content:""}.fa-heart-o:before{content:""}.fa-sign-out:before{content:""}.fa-linkedin-square:before{content:""}.fa-thumb-tack:before{content:""}.fa-external-link:before{content:""}.fa-sign-in:before{content:""}.fa-trophy:before{content:""}.fa-github-square:before{content:""}.fa-upload:before{content:""}.fa-lemon-o:before{content:""}.fa-phone:before{content:""}.fa-square-o:before{content:""}.fa-bookmark-o:before{content:""}.fa-phone-square:before{content:""}.fa-twitter:before{content:""}.fa-facebook-f:before,.fa-facebook:before{content:""}.fa-github:before,.icon-github:before{content:""}.fa-unlock:before{content:""}.fa-credit-card:before{content:""}.fa-feed:before,.fa-rss:before{content:""}.fa-hdd-o:before{content:""}.fa-bullhorn:before{content:""}.fa-bell:before{content:""}.fa-certificate:before{content:""}.fa-hand-o-right:before{content:""}.fa-hand-o-left:before{content:""}.fa-hand-o-up:before{content:""}.fa-hand-o-down:before{content:""}.fa-arrow-circle-left:before,.icon-circle-arrow-left:before{content:""}.fa-arrow-circle-right:before,.icon-circle-arrow-right:before{content:""}.fa-arrow-circle-up:before{content:""}.fa-arrow-circle-down:before{content:""}.fa-globe:before{content:""}.fa-wrench:before{content:""}.fa-tasks:before{content:""}.fa-filter:before{content:""}.fa-briefcase:before{content:""}.fa-arrows-alt:before{content:""}.fa-group:before,.fa-users:before{content:""}.fa-chain:before,.fa-link:before,.icon-link:before{content:""}.fa-cloud:before{content:""}.fa-flask:before{content:""}.fa-cut:before,.fa-scissors:before{content:""}.fa-copy:before,.fa-files-o:before{content:""}.fa-paperclip:before{content:""}.fa-floppy-o:before,.fa-save:before{content:""}.fa-square:before{content:""}.fa-bars:before,.fa-navicon:before,.fa-reorder:before{content:""}.fa-list-ul:before{content:""}.fa-list-ol:before{content:""}.fa-strikethrough:before{content:""}.fa-underline:before{content:""}.fa-table:before{content:""}.fa-magic:before{content:""}.fa-truck:before{content:""}.fa-pinterest:before{content:""}.fa-pinterest-square:before{content:""}.fa-google-plus-square:before{content:""}.fa-google-plus:before{content:""}.fa-money:before{content:""}.fa-caret-down:before,.icon-caret-down:before,.wy-dropdown .caret:before{content:""}.fa-caret-up:before{content:""}.fa-caret-left:before{content:""}.fa-caret-right:before{content:""}.fa-columns:before{content:""}.fa-sort:before,.fa-unsorted:before{content:""}.fa-sort-desc:before,.fa-sort-down:before{content:""}.fa-sort-asc:before,.fa-sort-up:before{content:""}.fa-envelope:before{content:""}.fa-linkedin:before{content:""}.fa-rotate-left:before,.fa-undo:before{content:""}.fa-gavel:before,.fa-legal:before{content:""}.fa-dashboard:before,.fa-tachometer:before{content:""}.fa-comment-o:before{content:""}.fa-comments-o:before{content:""}.fa-bolt:before,.fa-flash:before{content:""}.fa-sitemap:before{content:""}.fa-umbrella:before{content:""}.fa-clipboard:before,.fa-paste:before{content:""}.fa-lightbulb-o:before{content:""}.fa-exchange:before{content:""}.fa-cloud-download:before{content:""}.fa-cloud-upload:before{content:""}.fa-user-md:before{content:""}.fa-stethoscope:before{content:""}.fa-suitcase:before{content:""}.fa-bell-o:before{content:""}.fa-coffee:before{content:""}.fa-cutlery:before{content:""}.fa-file-text-o:before{content:""}.fa-building-o:before{content:""}.fa-hospital-o:before{content:""}.fa-ambulance:before{content:""}.fa-medkit:before{content:""}.fa-fighter-jet:before{content:""}.fa-beer:before{content:""}.fa-h-square:before{content:""}.fa-plus-square:before{content:""}.fa-angle-double-left:before{content:""}.fa-angle-double-right:before{content:""}.fa-angle-double-up:before{content:""}.fa-angle-double-down:before{content:""}.fa-angle-left:before{content:""}.fa-angle-right:before{content:""}.fa-angle-up:before{content:""}.fa-angle-down:before{content:""}.fa-desktop:before{content:""}.fa-laptop:before{content:""}.fa-tablet:before{content:""}.fa-mobile-phone:before,.fa-mobile:before{content:""}.fa-circle-o:before{content:""}.fa-quote-left:before{content:""}.fa-quote-right:before{content:""}.fa-spinner:before{content:""}.fa-circle:before{content:""}.fa-mail-reply:before,.fa-reply:before{content:""}.fa-github-alt:before{content:""}.fa-folder-o:before{content:""}.fa-folder-open-o:before{content:""}.fa-smile-o:before{content:""}.fa-frown-o:before{content:""}.fa-meh-o:before{content:""}.fa-gamepad:before{content:""}.fa-keyboard-o:before{content:""}.fa-flag-o:before{content:""}.fa-flag-checkered:before{content:""}.fa-terminal:before{content:""}.fa-code:before{content:""}.fa-mail-reply-all:before,.fa-reply-all:before{content:""}.fa-star-half-empty:before,.fa-star-half-full:before,.fa-star-half-o:before{content:""}.fa-location-arrow:before{content:""}.fa-crop:before{content:""}.fa-code-fork:before{content:""}.fa-chain-broken:before,.fa-unlink:before{content:""}.fa-question:before{content:""}.fa-info:before{content:""}.fa-exclamation:before{content:""}.fa-superscript:before{content:""}.fa-subscript:before{content:""}.fa-eraser:before{content:""}.fa-puzzle-piece:before{content:""}.fa-microphone:before{content:""}.fa-microphone-slash:before{content:""}.fa-shield:before{content:""}.fa-calendar-o:before{content:""}.fa-fire-extinguisher:before{content:""}.fa-rocket:before{content:""}.fa-maxcdn:before{content:""}.fa-chevron-circle-left:before{content:""}.fa-chevron-circle-right:before{content:""}.fa-chevron-circle-up:before{content:""}.fa-chevron-circle-down:before{content:""}.fa-html5:before{content:""}.fa-css3:before{content:""}.fa-anchor:before{content:""}.fa-unlock-alt:before{content:""}.fa-bullseye:before{content:""}.fa-ellipsis-h:before{content:""}.fa-ellipsis-v:before{content:""}.fa-rss-square:before{content:""}.fa-play-circle:before{content:""}.fa-ticket:before{content:""}.fa-minus-square:before{content:""}.fa-minus-square-o:before,.wy-menu-vertical li.current>a button.toctree-expand:before,.wy-menu-vertical li.on a button.toctree-expand:before{content:""}.fa-level-up:before{content:""}.fa-level-down:before{content:""}.fa-check-square:before{content:""}.fa-pencil-square:before{content:""}.fa-external-link-square:before{content:""}.fa-share-square:before{content:""}.fa-compass:before{content:""}.fa-caret-square-o-down:before,.fa-toggle-down:before{content:""}.fa-caret-square-o-up:before,.fa-toggle-up:before{content:""}.fa-caret-square-o-right:before,.fa-toggle-right:before{content:""}.fa-eur:before,.fa-euro:before{content:""}.fa-gbp:before{content:""}.fa-dollar:before,.fa-usd:before{content:""}.fa-inr:before,.fa-rupee:before{content:""}.fa-cny:before,.fa-jpy:before,.fa-rmb:before,.fa-yen:before{content:""}.fa-rouble:before,.fa-rub:before,.fa-ruble:before{content:""}.fa-krw:before,.fa-won:before{content:""}.fa-bitcoin:before,.fa-btc:before{content:""}.fa-file:before{content:""}.fa-file-text:before{content:""}.fa-sort-alpha-asc:before{content:""}.fa-sort-alpha-desc:before{content:""}.fa-sort-amount-asc:before{content:""}.fa-sort-amount-desc:before{content:""}.fa-sort-numeric-asc:before{content:""}.fa-sort-numeric-desc:before{content:""}.fa-thumbs-up:before{content:""}.fa-thumbs-down:before{content:""}.fa-youtube-square:before{content:""}.fa-youtube:before{content:""}.fa-xing:before{content:""}.fa-xing-square:before{content:""}.fa-youtube-play:before{content:""}.fa-dropbox:before{content:""}.fa-stack-overflow:before{content:""}.fa-instagram:before{content:""}.fa-flickr:before{content:""}.fa-adn:before{content:""}.fa-bitbucket:before,.icon-bitbucket:before{content:""}.fa-bitbucket-square:before{content:""}.fa-tumblr:before{content:""}.fa-tumblr-square:before{content:""}.fa-long-arrow-down:before{content:""}.fa-long-arrow-up:before{content:""}.fa-long-arrow-left:before{content:""}.fa-long-arrow-right:before{content:""}.fa-apple:before{content:""}.fa-windows:before{content:""}.fa-android:before{content:""}.fa-linux:before{content:""}.fa-dribbble:before{content:""}.fa-skype:before{content:""}.fa-foursquare:before{content:""}.fa-trello:before{content:""}.fa-female:before{content:""}.fa-male:before{content:""}.fa-gittip:before,.fa-gratipay:before{content:""}.fa-sun-o:before{content:""}.fa-moon-o:before{content:""}.fa-archive:before{content:""}.fa-bug:before{content:""}.fa-vk:before{content:""}.fa-weibo:before{content:""}.fa-renren:before{content:""}.fa-pagelines:before{content:""}.fa-stack-exchange:before{content:""}.fa-arrow-circle-o-right:before{content:""}.fa-arrow-circle-o-left:before{content:""}.fa-caret-square-o-left:before,.fa-toggle-left:before{content:""}.fa-dot-circle-o:before{content:""}.fa-wheelchair:before{content:""}.fa-vimeo-square:before{content:""}.fa-try:before,.fa-turkish-lira:before{content:""}.fa-plus-square-o:before,.wy-menu-vertical li button.toctree-expand:before{content:""}.fa-space-shuttle:before{content:""}.fa-slack:before{content:""}.fa-envelope-square:before{content:""}.fa-wordpress:before{content:""}.fa-openid:before{content:""}.fa-bank:before,.fa-institution:before,.fa-university:before{content:""}.fa-graduation-cap:before,.fa-mortar-board:before{content:""}.fa-yahoo:before{content:""}.fa-google:before{content:""}.fa-reddit:before{content:""}.fa-reddit-square:before{content:""}.fa-stumbleupon-circle:before{content:""}.fa-stumbleupon:before{content:""}.fa-delicious:before{content:""}.fa-digg:before{content:""}.fa-pied-piper-pp:before{content:""}.fa-pied-piper-alt:before{content:""}.fa-drupal:before{content:""}.fa-joomla:before{content:""}.fa-language:before{content:""}.fa-fax:before{content:""}.fa-building:before{content:""}.fa-child:before{content:""}.fa-paw:before{content:""}.fa-spoon:before{content:""}.fa-cube:before{content:""}.fa-cubes:before{content:""}.fa-behance:before{content:""}.fa-behance-square:before{content:""}.fa-steam:before{content:""}.fa-steam-square:before{content:""}.fa-recycle:before{content:""}.fa-automobile:before,.fa-car:before{content:""}.fa-cab:before,.fa-taxi:before{content:""}.fa-tree:before{content:""}.fa-spotify:before{content:""}.fa-deviantart:before{content:""}.fa-soundcloud:before{content:""}.fa-database:before{content:""}.fa-file-pdf-o:before{content:""}.fa-file-word-o:before{content:""}.fa-file-excel-o:before{content:""}.fa-file-powerpoint-o:before{content:""}.fa-file-image-o:before,.fa-file-photo-o:before,.fa-file-picture-o:before{content:""}.fa-file-archive-o:before,.fa-file-zip-o:before{content:""}.fa-file-audio-o:before,.fa-file-sound-o:before{content:""}.fa-file-movie-o:before,.fa-file-video-o:before{content:""}.fa-file-code-o:before{content:""}.fa-vine:before{content:""}.fa-codepen:before{content:""}.fa-jsfiddle:before{content:""}.fa-life-bouy:before,.fa-life-buoy:before,.fa-life-ring:before,.fa-life-saver:before,.fa-support:before{content:""}.fa-circle-o-notch:before{content:""}.fa-ra:before,.fa-rebel:before,.fa-resistance:before{content:""}.fa-empire:before,.fa-ge:before{content:""}.fa-git-square:before{content:""}.fa-git:before{content:""}.fa-hacker-news:before,.fa-y-combinator-square:before,.fa-yc-square:before{content:""}.fa-tencent-weibo:before{content:""}.fa-qq:before{content:""}.fa-wechat:before,.fa-weixin:before{content:""}.fa-paper-plane:before,.fa-send:before{content:""}.fa-paper-plane-o:before,.fa-send-o:before{content:""}.fa-history:before{content:""}.fa-circle-thin:before{content:""}.fa-header:before{content:""}.fa-paragraph:before{content:""}.fa-sliders:before{content:""}.fa-share-alt:before{content:""}.fa-share-alt-square:before{content:""}.fa-bomb:before{content:""}.fa-futbol-o:before,.fa-soccer-ball-o:before{content:""}.fa-tty:before{content:""}.fa-binoculars:before{content:""}.fa-plug:before{content:""}.fa-slideshare:before{content:""}.fa-twitch:before{content:""}.fa-yelp:before{content:""}.fa-newspaper-o:before{content:""}.fa-wifi:before{content:""}.fa-calculator:before{content:""}.fa-paypal:before{content:""}.fa-google-wallet:before{content:""}.fa-cc-visa:before{content:""}.fa-cc-mastercard:before{content:""}.fa-cc-discover:before{content:""}.fa-cc-amex:before{content:""}.fa-cc-paypal:before{content:""}.fa-cc-stripe:before{content:""}.fa-bell-slash:before{content:""}.fa-bell-slash-o:before{content:""}.fa-trash:before{content:""}.fa-copyright:before{content:""}.fa-at:before{content:""}.fa-eyedropper:before{content:""}.fa-paint-brush:before{content:""}.fa-birthday-cake:before{content:""}.fa-area-chart:before{content:""}.fa-pie-chart:before{content:""}.fa-line-chart:before{content:""}.fa-lastfm:before{content:""}.fa-lastfm-square:before{content:""}.fa-toggle-off:before{content:""}.fa-toggle-on:before{content:""}.fa-bicycle:before{content:""}.fa-bus:before{content:""}.fa-ioxhost:before{content:""}.fa-angellist:before{content:""}.fa-cc:before{content:""}.fa-ils:before,.fa-shekel:before,.fa-sheqel:before{content:""}.fa-meanpath:before{content:""}.fa-buysellads:before{content:""}.fa-connectdevelop:before{content:""}.fa-dashcube:before{content:""}.fa-forumbee:before{content:""}.fa-leanpub:before{content:""}.fa-sellsy:before{content:""}.fa-shirtsinbulk:before{content:""}.fa-simplybuilt:before{content:""}.fa-skyatlas:before{content:""}.fa-cart-plus:before{content:""}.fa-cart-arrow-down:before{content:""}.fa-diamond:before{content:""}.fa-ship:before{content:""}.fa-user-secret:before{content:""}.fa-motorcycle:before{content:""}.fa-street-view:before{content:""}.fa-heartbeat:before{content:""}.fa-venus:before{content:""}.fa-mars:before{content:""}.fa-mercury:before{content:""}.fa-intersex:before,.fa-transgender:before{content:""}.fa-transgender-alt:before{content:""}.fa-venus-double:before{content:""}.fa-mars-double:before{content:""}.fa-venus-mars:before{content:""}.fa-mars-stroke:before{content:""}.fa-mars-stroke-v:before{content:""}.fa-mars-stroke-h:before{content:""}.fa-neuter:before{content:""}.fa-genderless:before{content:""}.fa-facebook-official:before{content:""}.fa-pinterest-p:before{content:""}.fa-whatsapp:before{content:""}.fa-server:before{content:""}.fa-user-plus:before{content:""}.fa-user-times:before{content:""}.fa-bed:before,.fa-hotel:before{content:""}.fa-viacoin:before{content:""}.fa-train:before{content:""}.fa-subway:before{content:""}.fa-medium:before{content:""}.fa-y-combinator:before,.fa-yc:before{content:""}.fa-optin-monster:before{content:""}.fa-opencart:before{content:""}.fa-expeditedssl:before{content:""}.fa-battery-4:before,.fa-battery-full:before,.fa-battery:before{content:""}.fa-battery-3:before,.fa-battery-three-quarters:before{content:""}.fa-battery-2:before,.fa-battery-half:before{content:""}.fa-battery-1:before,.fa-battery-quarter:before{content:""}.fa-battery-0:before,.fa-battery-empty:before{content:""}.fa-mouse-pointer:before{content:""}.fa-i-cursor:before{content:""}.fa-object-group:before{content:""}.fa-object-ungroup:before{content:""}.fa-sticky-note:before{content:""}.fa-sticky-note-o:before{content:""}.fa-cc-jcb:before{content:""}.fa-cc-diners-club:before{content:""}.fa-clone:before{content:""}.fa-balance-scale:before{content:""}.fa-hourglass-o:before{content:""}.fa-hourglass-1:before,.fa-hourglass-start:before{content:""}.fa-hourglass-2:before,.fa-hourglass-half:before{content:""}.fa-hourglass-3:before,.fa-hourglass-end:before{content:""}.fa-hourglass:before{content:""}.fa-hand-grab-o:before,.fa-hand-rock-o:before{content:""}.fa-hand-paper-o:before,.fa-hand-stop-o:before{content:""}.fa-hand-scissors-o:before{content:""}.fa-hand-lizard-o:before{content:""}.fa-hand-spock-o:before{content:""}.fa-hand-pointer-o:before{content:""}.fa-hand-peace-o:before{content:""}.fa-trademark:before{content:""}.fa-registered:before{content:""}.fa-creative-commons:before{content:""}.fa-gg:before{content:""}.fa-gg-circle:before{content:""}.fa-tripadvisor:before{content:""}.fa-odnoklassniki:before{content:""}.fa-odnoklassniki-square:before{content:""}.fa-get-pocket:before{content:""}.fa-wikipedia-w:before{content:""}.fa-safari:before{content:""}.fa-chrome:before{content:""}.fa-firefox:before{content:""}.fa-opera:before{content:""}.fa-internet-explorer:before{content:""}.fa-television:before,.fa-tv:before{content:""}.fa-contao:before{content:""}.fa-500px:before{content:""}.fa-amazon:before{content:""}.fa-calendar-plus-o:before{content:""}.fa-calendar-minus-o:before{content:""}.fa-calendar-times-o:before{content:""}.fa-calendar-check-o:before{content:""}.fa-industry:before{content:""}.fa-map-pin:before{content:""}.fa-map-signs:before{content:""}.fa-map-o:before{content:""}.fa-map:before{content:""}.fa-commenting:before{content:""}.fa-commenting-o:before{content:""}.fa-houzz:before{content:""}.fa-vimeo:before{content:""}.fa-black-tie:before{content:""}.fa-fonticons:before{content:""}.fa-reddit-alien:before{content:""}.fa-edge:before{content:""}.fa-credit-card-alt:before{content:""}.fa-codiepie:before{content:""}.fa-modx:before{content:""}.fa-fort-awesome:before{content:""}.fa-usb:before{content:""}.fa-product-hunt:before{content:""}.fa-mixcloud:before{content:""}.fa-scribd:before{content:""}.fa-pause-circle:before{content:""}.fa-pause-circle-o:before{content:""}.fa-stop-circle:before{content:""}.fa-stop-circle-o:before{content:""}.fa-shopping-bag:before{content:""}.fa-shopping-basket:before{content:""}.fa-hashtag:before{content:""}.fa-bluetooth:before{content:""}.fa-bluetooth-b:before{content:""}.fa-percent:before{content:""}.fa-gitlab:before,.icon-gitlab:before{content:""}.fa-wpbeginner:before{content:""}.fa-wpforms:before{content:""}.fa-envira:before{content:""}.fa-universal-access:before{content:""}.fa-wheelchair-alt:before{content:""}.fa-question-circle-o:before{content:""}.fa-blind:before{content:""}.fa-audio-description:before{content:""}.fa-volume-control-phone:before{content:""}.fa-braille:before{content:""}.fa-assistive-listening-systems:before{content:""}.fa-american-sign-language-interpreting:before,.fa-asl-interpreting:before{content:""}.fa-deaf:before,.fa-deafness:before,.fa-hard-of-hearing:before{content:""}.fa-glide:before{content:""}.fa-glide-g:before{content:""}.fa-sign-language:before,.fa-signing:before{content:""}.fa-low-vision:before{content:""}.fa-viadeo:before{content:""}.fa-viadeo-square:before{content:""}.fa-snapchat:before{content:""}.fa-snapchat-ghost:before{content:""}.fa-snapchat-square:before{content:""}.fa-pied-piper:before{content:""}.fa-first-order:before{content:""}.fa-yoast:before{content:""}.fa-themeisle:before{content:""}.fa-google-plus-circle:before,.fa-google-plus-official:before{content:""}.fa-fa:before,.fa-font-awesome:before{content:""}.fa-handshake-o:before{content:""}.fa-envelope-open:before{content:""}.fa-envelope-open-o:before{content:""}.fa-linode:before{content:""}.fa-address-book:before{content:""}.fa-address-book-o:before{content:""}.fa-address-card:before,.fa-vcard:before{content:""}.fa-address-card-o:before,.fa-vcard-o:before{content:""}.fa-user-circle:before{content:""}.fa-user-circle-o:before{content:""}.fa-user-o:before{content:""}.fa-id-badge:before{content:""}.fa-drivers-license:before,.fa-id-card:before{content:""}.fa-drivers-license-o:before,.fa-id-card-o:before{content:""}.fa-quora:before{content:""}.fa-free-code-camp:before{content:""}.fa-telegram:before{content:""}.fa-thermometer-4:before,.fa-thermometer-full:before,.fa-thermometer:before{content:""}.fa-thermometer-3:before,.fa-thermometer-three-quarters:before{content:""}.fa-thermometer-2:before,.fa-thermometer-half:before{content:""}.fa-thermometer-1:before,.fa-thermometer-quarter:before{content:""}.fa-thermometer-0:before,.fa-thermometer-empty:before{content:""}.fa-shower:before{content:""}.fa-bath:before,.fa-bathtub:before,.fa-s15:before{content:""}.fa-podcast:before{content:""}.fa-window-maximize:before{content:""}.fa-window-minimize:before{content:""}.fa-window-restore:before{content:""}.fa-times-rectangle:before,.fa-window-close:before{content:""}.fa-times-rectangle-o:before,.fa-window-close-o:before{content:""}.fa-bandcamp:before{content:""}.fa-grav:before{content:""}.fa-etsy:before{content:""}.fa-imdb:before{content:""}.fa-ravelry:before{content:""}.fa-eercast:before{content:""}.fa-microchip:before{content:""}.fa-snowflake-o:before{content:""}.fa-superpowers:before{content:""}.fa-wpexplorer:before{content:""}.fa-meetup:before{content:""}.sr-only{position:absolute;width:1px;height:1px;padding:0;margin:-1px;overflow:hidden;clip:rect(0,0,0,0);border:0}.sr-only-focusable:active,.sr-only-focusable:focus{position:static;width:auto;height:auto;margin:0;overflow:visible;clip:auto}.fa,.icon,.rst-content .admonition-title,.rst-content .code-block-caption .headerlink,.rst-content .eqno .headerlink,.rst-content code.download span:first-child,.rst-content dl dt .headerlink,.rst-content h1 .headerlink,.rst-content h2 .headerlink,.rst-content h3 .headerlink,.rst-content h4 .headerlink,.rst-content h5 .headerlink,.rst-content h6 .headerlink,.rst-content p.caption .headerlink,.rst-content p .headerlink,.rst-content table>caption .headerlink,.rst-content tt.download span:first-child,.wy-dropdown .caret,.wy-inline-validate.wy-inline-validate-danger .wy-input-context,.wy-inline-validate.wy-inline-validate-info .wy-input-context,.wy-inline-validate.wy-inline-validate-success .wy-input-context,.wy-inline-validate.wy-inline-validate-warning .wy-input-context,.wy-menu-vertical li.current>a button.toctree-expand,.wy-menu-vertical li.on a button.toctree-expand,.wy-menu-vertical li button.toctree-expand{font-family:inherit}.fa:before,.icon:before,.rst-content .admonition-title:before,.rst-content .code-block-caption .headerlink:before,.rst-content .eqno .headerlink:before,.rst-content code.download span:first-child:before,.rst-content dl dt .headerlink:before,.rst-content h1 .headerlink:before,.rst-content h2 .headerlink:before,.rst-content h3 .headerlink:before,.rst-content h4 .headerlink:before,.rst-content h5 .headerlink:before,.rst-content h6 .headerlink:before,.rst-content p.caption .headerlink:before,.rst-content p .headerlink:before,.rst-content table>caption .headerlink:before,.rst-content tt.download span:first-child:before,.wy-dropdown .caret:before,.wy-inline-validate.wy-inline-validate-danger .wy-input-context:before,.wy-inline-validate.wy-inline-validate-info .wy-input-context:before,.wy-inline-validate.wy-inline-validate-success .wy-input-context:before,.wy-inline-validate.wy-inline-validate-warning .wy-input-context:before,.wy-menu-vertical li.current>a button.toctree-expand:before,.wy-menu-vertical li.on a button.toctree-expand:before,.wy-menu-vertical li button.toctree-expand:before{font-family:FontAwesome;display:inline-block;font-style:normal;font-weight:400;line-height:1;text-decoration:inherit}.rst-content .code-block-caption a .headerlink,.rst-content .eqno a .headerlink,.rst-content a .admonition-title,.rst-content code.download a span:first-child,.rst-content dl dt a .headerlink,.rst-content h1 a .headerlink,.rst-content h2 a .headerlink,.rst-content h3 a .headerlink,.rst-content h4 a .headerlink,.rst-content h5 a .headerlink,.rst-content h6 a .headerlink,.rst-content p.caption a .headerlink,.rst-content p a .headerlink,.rst-content table>caption a .headerlink,.rst-content tt.download a span:first-child,.wy-menu-vertical li.current>a button.toctree-expand,.wy-menu-vertical li.on a button.toctree-expand,.wy-menu-vertical li a button.toctree-expand,a .fa,a .icon,a .rst-content .admonition-title,a .rst-content .code-block-caption .headerlink,a .rst-content .eqno .headerlink,a .rst-content code.download span:first-child,a .rst-content dl dt .headerlink,a .rst-content h1 .headerlink,a .rst-content h2 .headerlink,a .rst-content h3 .headerlink,a .rst-content h4 .headerlink,a .rst-content h5 .headerlink,a .rst-content h6 .headerlink,a .rst-content p.caption .headerlink,a .rst-content p .headerlink,a .rst-content table>caption .headerlink,a .rst-content tt.download span:first-child,a .wy-menu-vertical li button.toctree-expand{display:inline-block;text-decoration:inherit}.btn .fa,.btn .icon,.btn .rst-content .admonition-title,.btn .rst-content .code-block-caption .headerlink,.btn .rst-content .eqno .headerlink,.btn .rst-content code.download span:first-child,.btn .rst-content dl dt .headerlink,.btn .rst-content h1 .headerlink,.btn .rst-content h2 .headerlink,.btn .rst-content h3 .headerlink,.btn .rst-content h4 .headerlink,.btn .rst-content h5 .headerlink,.btn .rst-content h6 .headerlink,.btn .rst-content p .headerlink,.btn .rst-content table>caption .headerlink,.btn .rst-content tt.download span:first-child,.btn .wy-menu-vertical li.current>a button.toctree-expand,.btn .wy-menu-vertical li.on a button.toctree-expand,.btn .wy-menu-vertical li button.toctree-expand,.nav .fa,.nav .icon,.nav .rst-content .admonition-title,.nav .rst-content .code-block-caption .headerlink,.nav .rst-content .eqno .headerlink,.nav .rst-content code.download span:first-child,.nav .rst-content dl dt .headerlink,.nav .rst-content h1 .headerlink,.nav .rst-content h2 .headerlink,.nav .rst-content h3 .headerlink,.nav .rst-content h4 .headerlink,.nav .rst-content h5 .headerlink,.nav .rst-content h6 .headerlink,.nav .rst-content p .headerlink,.nav .rst-content table>caption .headerlink,.nav .rst-content tt.download span:first-child,.nav .wy-menu-vertical li.current>a button.toctree-expand,.nav .wy-menu-vertical li.on a button.toctree-expand,.nav .wy-menu-vertical li button.toctree-expand,.rst-content .btn .admonition-title,.rst-content .code-block-caption .btn .headerlink,.rst-content .code-block-caption .nav .headerlink,.rst-content .eqno .btn .headerlink,.rst-content .eqno .nav .headerlink,.rst-content .nav .admonition-title,.rst-content code.download .btn span:first-child,.rst-content code.download .nav span:first-child,.rst-content dl dt .btn .headerlink,.rst-content dl dt .nav .headerlink,.rst-content h1 .btn .headerlink,.rst-content h1 .nav .headerlink,.rst-content h2 .btn .headerlink,.rst-content h2 .nav .headerlink,.rst-content h3 .btn .headerlink,.rst-content h3 .nav .headerlink,.rst-content h4 .btn .headerlink,.rst-content h4 .nav .headerlink,.rst-content h5 .btn .headerlink,.rst-content h5 .nav .headerlink,.rst-content h6 .btn .headerlink,.rst-content h6 .nav .headerlink,.rst-content p .btn .headerlink,.rst-content p .nav .headerlink,.rst-content table>caption .btn .headerlink,.rst-content table>caption .nav .headerlink,.rst-content tt.download .btn span:first-child,.rst-content tt.download .nav span:first-child,.wy-menu-vertical li .btn button.toctree-expand,.wy-menu-vertical li.current>a .btn button.toctree-expand,.wy-menu-vertical li.current>a .nav button.toctree-expand,.wy-menu-vertical li .nav button.toctree-expand,.wy-menu-vertical li.on a .btn button.toctree-expand,.wy-menu-vertical li.on a .nav button.toctree-expand{display:inline}.btn .fa-large.icon,.btn .fa.fa-large,.btn .rst-content .code-block-caption .fa-large.headerlink,.btn .rst-content .eqno .fa-large.headerlink,.btn .rst-content .fa-large.admonition-title,.btn .rst-content code.download span.fa-large:first-child,.btn .rst-content dl dt .fa-large.headerlink,.btn .rst-content h1 .fa-large.headerlink,.btn .rst-content h2 .fa-large.headerlink,.btn .rst-content h3 .fa-large.headerlink,.btn .rst-content h4 .fa-large.headerlink,.btn .rst-content h5 .fa-large.headerlink,.btn .rst-content h6 .fa-large.headerlink,.btn .rst-content p .fa-large.headerlink,.btn .rst-content table>caption .fa-large.headerlink,.btn .rst-content tt.download span.fa-large:first-child,.btn .wy-menu-vertical li button.fa-large.toctree-expand,.nav .fa-large.icon,.nav .fa.fa-large,.nav .rst-content .code-block-caption .fa-large.headerlink,.nav .rst-content .eqno .fa-large.headerlink,.nav .rst-content .fa-large.admonition-title,.nav .rst-content code.download span.fa-large:first-child,.nav .rst-content dl dt .fa-large.headerlink,.nav .rst-content h1 .fa-large.headerlink,.nav .rst-content h2 .fa-large.headerlink,.nav .rst-content h3 .fa-large.headerlink,.nav .rst-content h4 .fa-large.headerlink,.nav .rst-content h5 .fa-large.headerlink,.nav .rst-content h6 .fa-large.headerlink,.nav .rst-content p .fa-large.headerlink,.nav .rst-content table>caption .fa-large.headerlink,.nav .rst-content tt.download span.fa-large:first-child,.nav .wy-menu-vertical li button.fa-large.toctree-expand,.rst-content .btn .fa-large.admonition-title,.rst-content .code-block-caption .btn .fa-large.headerlink,.rst-content .code-block-caption .nav .fa-large.headerlink,.rst-content .eqno .btn .fa-large.headerlink,.rst-content .eqno .nav .fa-large.headerlink,.rst-content .nav .fa-large.admonition-title,.rst-content code.download .btn span.fa-large:first-child,.rst-content code.download .nav span.fa-large:first-child,.rst-content dl dt .btn .fa-large.headerlink,.rst-content dl dt .nav .fa-large.headerlink,.rst-content h1 .btn .fa-large.headerlink,.rst-content h1 .nav .fa-large.headerlink,.rst-content h2 .btn .fa-large.headerlink,.rst-content h2 .nav .fa-large.headerlink,.rst-content h3 .btn .fa-large.headerlink,.rst-content h3 .nav .fa-large.headerlink,.rst-content h4 .btn .fa-large.headerlink,.rst-content h4 .nav .fa-large.headerlink,.rst-content h5 .btn .fa-large.headerlink,.rst-content h5 .nav .fa-large.headerlink,.rst-content h6 .btn .fa-large.headerlink,.rst-content h6 .nav .fa-large.headerlink,.rst-content p .btn .fa-large.headerlink,.rst-content p .nav .fa-large.headerlink,.rst-content table>caption .btn .fa-large.headerlink,.rst-content table>caption .nav .fa-large.headerlink,.rst-content tt.download .btn span.fa-large:first-child,.rst-content tt.download .nav span.fa-large:first-child,.wy-menu-vertical li .btn button.fa-large.toctree-expand,.wy-menu-vertical li .nav button.fa-large.toctree-expand{line-height:.9em}.btn .fa-spin.icon,.btn .fa.fa-spin,.btn .rst-content .code-block-caption .fa-spin.headerlink,.btn .rst-content .eqno .fa-spin.headerlink,.btn .rst-content .fa-spin.admonition-title,.btn .rst-content code.download span.fa-spin:first-child,.btn .rst-content dl dt .fa-spin.headerlink,.btn .rst-content h1 .fa-spin.headerlink,.btn .rst-content h2 .fa-spin.headerlink,.btn .rst-content h3 .fa-spin.headerlink,.btn .rst-content h4 .fa-spin.headerlink,.btn .rst-content h5 .fa-spin.headerlink,.btn .rst-content h6 .fa-spin.headerlink,.btn .rst-content p .fa-spin.headerlink,.btn .rst-content table>caption .fa-spin.headerlink,.btn .rst-content tt.download span.fa-spin:first-child,.btn .wy-menu-vertical li button.fa-spin.toctree-expand,.nav .fa-spin.icon,.nav .fa.fa-spin,.nav .rst-content .code-block-caption .fa-spin.headerlink,.nav .rst-content .eqno .fa-spin.headerlink,.nav .rst-content .fa-spin.admonition-title,.nav .rst-content code.download span.fa-spin:first-child,.nav .rst-content dl dt .fa-spin.headerlink,.nav .rst-content h1 .fa-spin.headerlink,.nav .rst-content h2 .fa-spin.headerlink,.nav .rst-content h3 .fa-spin.headerlink,.nav .rst-content h4 .fa-spin.headerlink,.nav .rst-content h5 .fa-spin.headerlink,.nav .rst-content h6 .fa-spin.headerlink,.nav .rst-content p .fa-spin.headerlink,.nav .rst-content table>caption .fa-spin.headerlink,.nav .rst-content tt.download span.fa-spin:first-child,.nav .wy-menu-vertical li button.fa-spin.toctree-expand,.rst-content .btn .fa-spin.admonition-title,.rst-content .code-block-caption .btn .fa-spin.headerlink,.rst-content .code-block-caption .nav .fa-spin.headerlink,.rst-content .eqno .btn .fa-spin.headerlink,.rst-content .eqno .nav .fa-spin.headerlink,.rst-content .nav .fa-spin.admonition-title,.rst-content code.download .btn span.fa-spin:first-child,.rst-content code.download .nav span.fa-spin:first-child,.rst-content dl dt .btn .fa-spin.headerlink,.rst-content dl dt .nav .fa-spin.headerlink,.rst-content h1 .btn .fa-spin.headerlink,.rst-content h1 .nav .fa-spin.headerlink,.rst-content h2 .btn .fa-spin.headerlink,.rst-content h2 .nav .fa-spin.headerlink,.rst-content h3 .btn .fa-spin.headerlink,.rst-content h3 .nav .fa-spin.headerlink,.rst-content h4 .btn .fa-spin.headerlink,.rst-content h4 .nav .fa-spin.headerlink,.rst-content h5 .btn .fa-spin.headerlink,.rst-content h5 .nav .fa-spin.headerlink,.rst-content h6 .btn .fa-spin.headerlink,.rst-content h6 .nav .fa-spin.headerlink,.rst-content p .btn .fa-spin.headerlink,.rst-content p .nav .fa-spin.headerlink,.rst-content table>caption .btn .fa-spin.headerlink,.rst-content table>caption .nav .fa-spin.headerlink,.rst-content tt.download .btn span.fa-spin:first-child,.rst-content tt.download .nav span.fa-spin:first-child,.wy-menu-vertical li .btn button.fa-spin.toctree-expand,.wy-menu-vertical li .nav button.fa-spin.toctree-expand{display:inline-block}.btn.fa:before,.btn.icon:before,.rst-content .btn.admonition-title:before,.rst-content .code-block-caption .btn.headerlink:before,.rst-content .eqno .btn.headerlink:before,.rst-content code.download span.btn:first-child:before,.rst-content dl dt .btn.headerlink:before,.rst-content h1 .btn.headerlink:before,.rst-content h2 .btn.headerlink:before,.rst-content h3 .btn.headerlink:before,.rst-content h4 .btn.headerlink:before,.rst-content h5 .btn.headerlink:before,.rst-content h6 .btn.headerlink:before,.rst-content p .btn.headerlink:before,.rst-content table>caption .btn.headerlink:before,.rst-content tt.download span.btn:first-child:before,.wy-menu-vertical li button.btn.toctree-expand:before{opacity:.5;-webkit-transition:opacity .05s ease-in;-moz-transition:opacity .05s ease-in;transition:opacity .05s ease-in}.btn.fa:hover:before,.btn.icon:hover:before,.rst-content .btn.admonition-title:hover:before,.rst-content .code-block-caption .btn.headerlink:hover:before,.rst-content .eqno .btn.headerlink:hover:before,.rst-content code.download span.btn:first-child:hover:before,.rst-content dl dt .btn.headerlink:hover:before,.rst-content h1 .btn.headerlink:hover:before,.rst-content h2 .btn.headerlink:hover:before,.rst-content h3 .btn.headerlink:hover:before,.rst-content h4 .btn.headerlink:hover:before,.rst-content h5 .btn.headerlink:hover:before,.rst-content h6 .btn.headerlink:hover:before,.rst-content p .btn.headerlink:hover:before,.rst-content table>caption .btn.headerlink:hover:before,.rst-content tt.download span.btn:first-child:hover:before,.wy-menu-vertical li button.btn.toctree-expand:hover:before{opacity:1}.btn-mini .fa:before,.btn-mini .icon:before,.btn-mini .rst-content .admonition-title:before,.btn-mini .rst-content .code-block-caption .headerlink:before,.btn-mini .rst-content .eqno .headerlink:before,.btn-mini .rst-content code.download span:first-child:before,.btn-mini .rst-content dl dt .headerlink:before,.btn-mini .rst-content h1 .headerlink:before,.btn-mini .rst-content h2 .headerlink:before,.btn-mini .rst-content h3 .headerlink:before,.btn-mini .rst-content h4 .headerlink:before,.btn-mini .rst-content h5 .headerlink:before,.btn-mini .rst-content h6 .headerlink:before,.btn-mini .rst-content p .headerlink:before,.btn-mini .rst-content table>caption .headerlink:before,.btn-mini .rst-content tt.download span:first-child:before,.btn-mini .wy-menu-vertical li button.toctree-expand:before,.rst-content .btn-mini .admonition-title:before,.rst-content .code-block-caption .btn-mini .headerlink:before,.rst-content .eqno .btn-mini .headerlink:before,.rst-content code.download .btn-mini span:first-child:before,.rst-content dl dt .btn-mini .headerlink:before,.rst-content h1 .btn-mini .headerlink:before,.rst-content h2 .btn-mini .headerlink:before,.rst-content h3 .btn-mini .headerlink:before,.rst-content h4 .btn-mini .headerlink:before,.rst-content h5 .btn-mini .headerlink:before,.rst-content h6 .btn-mini .headerlink:before,.rst-content p .btn-mini .headerlink:before,.rst-content table>caption .btn-mini .headerlink:before,.rst-content tt.download .btn-mini span:first-child:before,.wy-menu-vertical li .btn-mini button.toctree-expand:before{font-size:14px;vertical-align:-15%}.rst-content .admonition,.rst-content .admonition-todo,.rst-content .attention,.rst-content .caution,.rst-content .danger,.rst-content .error,.rst-content .hint,.rst-content .important,.rst-content .note,.rst-content .seealso,.rst-content .tip,.rst-content .warning,.wy-alert{padding:12px;line-height:24px;margin-bottom:24px;background:#e7f2fa}.rst-content .admonition-title,.wy-alert-title{font-weight:700;display:block;color:#fff;background:#6ab0de;padding:6px 12px;margin:-12px -12px 12px}.rst-content .danger,.rst-content .error,.rst-content .wy-alert-danger.admonition,.rst-content .wy-alert-danger.admonition-todo,.rst-content .wy-alert-danger.attention,.rst-content .wy-alert-danger.caution,.rst-content .wy-alert-danger.hint,.rst-content .wy-alert-danger.important,.rst-content .wy-alert-danger.note,.rst-content .wy-alert-danger.seealso,.rst-content .wy-alert-danger.tip,.rst-content .wy-alert-danger.warning,.wy-alert.wy-alert-danger{background:#fdf3f2}.rst-content .danger .admonition-title,.rst-content .danger .wy-alert-title,.rst-content .error .admonition-title,.rst-content .error .wy-alert-title,.rst-content .wy-alert-danger.admonition-todo .admonition-title,.rst-content .wy-alert-danger.admonition-todo .wy-alert-title,.rst-content .wy-alert-danger.admonition .admonition-title,.rst-content .wy-alert-danger.admonition .wy-alert-title,.rst-content .wy-alert-danger.attention .admonition-title,.rst-content .wy-alert-danger.attention .wy-alert-title,.rst-content .wy-alert-danger.caution .admonition-title,.rst-content .wy-alert-danger.caution .wy-alert-title,.rst-content .wy-alert-danger.hint .admonition-title,.rst-content .wy-alert-danger.hint .wy-alert-title,.rst-content .wy-alert-danger.important .admonition-title,.rst-content .wy-alert-danger.important .wy-alert-title,.rst-content .wy-alert-danger.note .admonition-title,.rst-content .wy-alert-danger.note .wy-alert-title,.rst-content .wy-alert-danger.seealso .admonition-title,.rst-content .wy-alert-danger.seealso .wy-alert-title,.rst-content .wy-alert-danger.tip .admonition-title,.rst-content .wy-alert-danger.tip .wy-alert-title,.rst-content .wy-alert-danger.warning .admonition-title,.rst-content .wy-alert-danger.warning .wy-alert-title,.rst-content .wy-alert.wy-alert-danger .admonition-title,.wy-alert.wy-alert-danger .rst-content .admonition-title,.wy-alert.wy-alert-danger .wy-alert-title{background:#f29f97}.rst-content .admonition-todo,.rst-content .attention,.rst-content .caution,.rst-content .warning,.rst-content .wy-alert-warning.admonition,.rst-content .wy-alert-warning.danger,.rst-content .wy-alert-warning.error,.rst-content .wy-alert-warning.hint,.rst-content .wy-alert-warning.important,.rst-content .wy-alert-warning.note,.rst-content .wy-alert-warning.seealso,.rst-content .wy-alert-warning.tip,.wy-alert.wy-alert-warning{background:#ffedcc}.rst-content .admonition-todo .admonition-title,.rst-content .admonition-todo .wy-alert-title,.rst-content .attention .admonition-title,.rst-content .attention .wy-alert-title,.rst-content .caution .admonition-title,.rst-content .caution .wy-alert-title,.rst-content .warning .admonition-title,.rst-content .warning .wy-alert-title,.rst-content .wy-alert-warning.admonition .admonition-title,.rst-content .wy-alert-warning.admonition .wy-alert-title,.rst-content .wy-alert-warning.danger .admonition-title,.rst-content .wy-alert-warning.danger .wy-alert-title,.rst-content .wy-alert-warning.error .admonition-title,.rst-content .wy-alert-warning.error .wy-alert-title,.rst-content .wy-alert-warning.hint .admonition-title,.rst-content .wy-alert-warning.hint .wy-alert-title,.rst-content .wy-alert-warning.important .admonition-title,.rst-content .wy-alert-warning.important .wy-alert-title,.rst-content .wy-alert-warning.note .admonition-title,.rst-content .wy-alert-warning.note .wy-alert-title,.rst-content .wy-alert-warning.seealso .admonition-title,.rst-content .wy-alert-warning.seealso .wy-alert-title,.rst-content .wy-alert-warning.tip .admonition-title,.rst-content .wy-alert-warning.tip .wy-alert-title,.rst-content .wy-alert.wy-alert-warning .admonition-title,.wy-alert.wy-alert-warning .rst-content .admonition-title,.wy-alert.wy-alert-warning .wy-alert-title{background:#f0b37e}.rst-content .note,.rst-content .seealso,.rst-content .wy-alert-info.admonition,.rst-content .wy-alert-info.admonition-todo,.rst-content .wy-alert-info.attention,.rst-content .wy-alert-info.caution,.rst-content .wy-alert-info.danger,.rst-content .wy-alert-info.error,.rst-content .wy-alert-info.hint,.rst-content .wy-alert-info.important,.rst-content .wy-alert-info.tip,.rst-content .wy-alert-info.warning,.wy-alert.wy-alert-info{background:#e7f2fa}.rst-content .note .admonition-title,.rst-content .note .wy-alert-title,.rst-content .seealso .admonition-title,.rst-content .seealso .wy-alert-title,.rst-content .wy-alert-info.admonition-todo .admonition-title,.rst-content .wy-alert-info.admonition-todo .wy-alert-title,.rst-content .wy-alert-info.admonition .admonition-title,.rst-content .wy-alert-info.admonition .wy-alert-title,.rst-content .wy-alert-info.attention .admonition-title,.rst-content .wy-alert-info.attention .wy-alert-title,.rst-content .wy-alert-info.caution .admonition-title,.rst-content .wy-alert-info.caution .wy-alert-title,.rst-content .wy-alert-info.danger .admonition-title,.rst-content .wy-alert-info.danger .wy-alert-title,.rst-content .wy-alert-info.error .admonition-title,.rst-content .wy-alert-info.error .wy-alert-title,.rst-content .wy-alert-info.hint .admonition-title,.rst-content .wy-alert-info.hint .wy-alert-title,.rst-content .wy-alert-info.important .admonition-title,.rst-content .wy-alert-info.important .wy-alert-title,.rst-content .wy-alert-info.tip .admonition-title,.rst-content .wy-alert-info.tip .wy-alert-title,.rst-content .wy-alert-info.warning .admonition-title,.rst-content .wy-alert-info.warning .wy-alert-title,.rst-content .wy-alert.wy-alert-info .admonition-title,.wy-alert.wy-alert-info .rst-content .admonition-title,.wy-alert.wy-alert-info .wy-alert-title{background:#6ab0de}.rst-content .hint,.rst-content .important,.rst-content .tip,.rst-content .wy-alert-success.admonition,.rst-content .wy-alert-success.admonition-todo,.rst-content .wy-alert-success.attention,.rst-content .wy-alert-success.caution,.rst-content .wy-alert-success.danger,.rst-content .wy-alert-success.error,.rst-content .wy-alert-success.note,.rst-content .wy-alert-success.seealso,.rst-content .wy-alert-success.warning,.wy-alert.wy-alert-success{background:#dbfaf4}.rst-content .hint .admonition-title,.rst-content .hint .wy-alert-title,.rst-content .important .admonition-title,.rst-content .important .wy-alert-title,.rst-content .tip .admonition-title,.rst-content .tip .wy-alert-title,.rst-content .wy-alert-success.admonition-todo .admonition-title,.rst-content .wy-alert-success.admonition-todo .wy-alert-title,.rst-content .wy-alert-success.admonition .admonition-title,.rst-content .wy-alert-success.admonition .wy-alert-title,.rst-content .wy-alert-success.attention .admonition-title,.rst-content .wy-alert-success.attention .wy-alert-title,.rst-content .wy-alert-success.caution .admonition-title,.rst-content .wy-alert-success.caution .wy-alert-title,.rst-content .wy-alert-success.danger .admonition-title,.rst-content .wy-alert-success.danger .wy-alert-title,.rst-content .wy-alert-success.error .admonition-title,.rst-content .wy-alert-success.error .wy-alert-title,.rst-content .wy-alert-success.note .admonition-title,.rst-content .wy-alert-success.note .wy-alert-title,.rst-content .wy-alert-success.seealso .admonition-title,.rst-content .wy-alert-success.seealso .wy-alert-title,.rst-content .wy-alert-success.warning .admonition-title,.rst-content .wy-alert-success.warning .wy-alert-title,.rst-content .wy-alert.wy-alert-success .admonition-title,.wy-alert.wy-alert-success .rst-content .admonition-title,.wy-alert.wy-alert-success .wy-alert-title{background:#1abc9c}.rst-content .wy-alert-neutral.admonition,.rst-content .wy-alert-neutral.admonition-todo,.rst-content .wy-alert-neutral.attention,.rst-content .wy-alert-neutral.caution,.rst-content .wy-alert-neutral.danger,.rst-content .wy-alert-neutral.error,.rst-content .wy-alert-neutral.hint,.rst-content .wy-alert-neutral.important,.rst-content .wy-alert-neutral.note,.rst-content .wy-alert-neutral.seealso,.rst-content .wy-alert-neutral.tip,.rst-content .wy-alert-neutral.warning,.wy-alert.wy-alert-neutral{background:#f3f6f6}.rst-content .wy-alert-neutral.admonition-todo .admonition-title,.rst-content .wy-alert-neutral.admonition-todo .wy-alert-title,.rst-content .wy-alert-neutral.admonition .admonition-title,.rst-content .wy-alert-neutral.admonition .wy-alert-title,.rst-content .wy-alert-neutral.attention .admonition-title,.rst-content .wy-alert-neutral.attention .wy-alert-title,.rst-content .wy-alert-neutral.caution .admonition-title,.rst-content .wy-alert-neutral.caution .wy-alert-title,.rst-content .wy-alert-neutral.danger .admonition-title,.rst-content .wy-alert-neutral.danger .wy-alert-title,.rst-content .wy-alert-neutral.error .admonition-title,.rst-content .wy-alert-neutral.error .wy-alert-title,.rst-content .wy-alert-neutral.hint .admonition-title,.rst-content .wy-alert-neutral.hint .wy-alert-title,.rst-content .wy-alert-neutral.important .admonition-title,.rst-content .wy-alert-neutral.important .wy-alert-title,.rst-content .wy-alert-neutral.note .admonition-title,.rst-content .wy-alert-neutral.note .wy-alert-title,.rst-content .wy-alert-neutral.seealso .admonition-title,.rst-content .wy-alert-neutral.seealso .wy-alert-title,.rst-content .wy-alert-neutral.tip .admonition-title,.rst-content .wy-alert-neutral.tip .wy-alert-title,.rst-content .wy-alert-neutral.warning .admonition-title,.rst-content .wy-alert-neutral.warning .wy-alert-title,.rst-content .wy-alert.wy-alert-neutral .admonition-title,.wy-alert.wy-alert-neutral .rst-content .admonition-title,.wy-alert.wy-alert-neutral .wy-alert-title{color:#404040;background:#e1e4e5}.rst-content .wy-alert-neutral.admonition-todo a,.rst-content .wy-alert-neutral.admonition a,.rst-content .wy-alert-neutral.attention a,.rst-content .wy-alert-neutral.caution a,.rst-content .wy-alert-neutral.danger a,.rst-content .wy-alert-neutral.error a,.rst-content .wy-alert-neutral.hint a,.rst-content .wy-alert-neutral.important a,.rst-content .wy-alert-neutral.note a,.rst-content .wy-alert-neutral.seealso a,.rst-content .wy-alert-neutral.tip a,.rst-content .wy-alert-neutral.warning a,.wy-alert.wy-alert-neutral a{color:#2980b9}.rst-content .admonition-todo p:last-child,.rst-content .admonition p:last-child,.rst-content .attention p:last-child,.rst-content .caution p:last-child,.rst-content .danger p:last-child,.rst-content .error p:last-child,.rst-content .hint p:last-child,.rst-content .important p:last-child,.rst-content .note p:last-child,.rst-content .seealso p:last-child,.rst-content .tip p:last-child,.rst-content .warning p:last-child,.wy-alert p:last-child{margin-bottom:0}.wy-tray-container{position:fixed;bottom:0;left:0;z-index:600}.wy-tray-container li{display:block;width:300px;background:transparent;color:#fff;text-align:center;box-shadow:0 5px 5px 0 rgba(0,0,0,.1);padding:0 24px;min-width:20%;opacity:0;height:0;line-height:56px;overflow:hidden;-webkit-transition:all .3s ease-in;-moz-transition:all .3s ease-in;transition:all .3s ease-in}.wy-tray-container li.wy-tray-item-success{background:#27ae60}.wy-tray-container li.wy-tray-item-info{background:#2980b9}.wy-tray-container li.wy-tray-item-warning{background:#e67e22}.wy-tray-container li.wy-tray-item-danger{background:#e74c3c}.wy-tray-container li.on{opacity:1;height:56px}@media screen and (max-width:768px){.wy-tray-container{bottom:auto;top:0;width:100%}.wy-tray-container li{width:100%}}button{font-size:100%;margin:0;vertical-align:baseline;*vertical-align:middle;cursor:pointer;line-height:normal;-webkit-appearance:button;*overflow:visible}button::-moz-focus-inner,input::-moz-focus-inner{border:0;padding:0}button[disabled]{cursor:default}.btn{display:inline-block;border-radius:2px;line-height:normal;white-space:nowrap;text-align:center;cursor:pointer;font-size:100%;padding:6px 12px 8px;color:#fff;border:1px solid rgba(0,0,0,.1);background-color:#27ae60;text-decoration:none;font-weight:400;font-family:Lato,proxima-nova,Helvetica Neue,Arial,sans-serif;box-shadow:inset 0 1px 2px -1px hsla(0,0%,100%,.5),inset 0 -2px 0 0 rgba(0,0,0,.1);outline-none:false;vertical-align:middle;*display:inline;zoom:1;-webkit-user-drag:none;-webkit-user-select:none;-moz-user-select:none;-ms-user-select:none;user-select:none;-webkit-transition:all .1s linear;-moz-transition:all .1s linear;transition:all .1s linear}.btn-hover{background:#2e8ece;color:#fff}.btn:hover{background:#2cc36b;color:#fff}.btn:focus{background:#2cc36b;outline:0}.btn:active{box-shadow:inset 0 -1px 0 0 rgba(0,0,0,.05),inset 0 2px 0 0 rgba(0,0,0,.1);padding:8px 12px 6px}.btn:visited{color:#fff}.btn-disabled,.btn-disabled:active,.btn-disabled:focus,.btn-disabled:hover,.btn:disabled{background-image:none;filter:progid:DXImageTransform.Microsoft.gradient(enabled = false);filter:alpha(opacity=40);opacity:.4;cursor:not-allowed;box-shadow:none}.btn::-moz-focus-inner{padding:0;border:0}.btn-small{font-size:80%}.btn-info{background-color:#2980b9!important}.btn-info:hover{background-color:#2e8ece!important}.btn-neutral{background-color:#f3f6f6!important;color:#404040!important}.btn-neutral:hover{background-color:#e5ebeb!important;color:#404040}.btn-neutral:visited{color:#404040!important}.btn-success{background-color:#27ae60!important}.btn-success:hover{background-color:#295!important}.btn-danger{background-color:#e74c3c!important}.btn-danger:hover{background-color:#ea6153!important}.btn-warning{background-color:#e67e22!important}.btn-warning:hover{background-color:#e98b39!important}.btn-invert{background-color:#222}.btn-invert:hover{background-color:#2f2f2f!important}.btn-link{background-color:transparent!important;color:#2980b9;box-shadow:none;border-color:transparent!important}.btn-link:active,.btn-link:hover{background-color:transparent!important;color:#409ad5!important;box-shadow:none}.btn-link:visited{color:#9b59b6}.wy-btn-group .btn,.wy-control .btn{vertical-align:middle}.wy-btn-group{margin-bottom:24px;*zoom:1}.wy-btn-group:after,.wy-btn-group:before{display:table;content:""}.wy-btn-group:after{clear:both}.wy-dropdown{position:relative;display:inline-block}.wy-dropdown-active .wy-dropdown-menu{display:block}.wy-dropdown-menu{position:absolute;left:0;display:none;float:left;top:100%;min-width:100%;background:#fcfcfc;z-index:100;border:1px solid #cfd7dd;box-shadow:0 2px 2px 0 rgba(0,0,0,.1);padding:12px}.wy-dropdown-menu>dd>a{display:block;clear:both;color:#404040;white-space:nowrap;font-size:90%;padding:0 12px;cursor:pointer}.wy-dropdown-menu>dd>a:hover{background:#2980b9;color:#fff}.wy-dropdown-menu>dd.divider{border-top:1px solid #cfd7dd;margin:6px 0}.wy-dropdown-menu>dd.search{padding-bottom:12px}.wy-dropdown-menu>dd.search input[type=search]{width:100%}.wy-dropdown-menu>dd.call-to-action{background:#e3e3e3;text-transform:uppercase;font-weight:500;font-size:80%}.wy-dropdown-menu>dd.call-to-action:hover{background:#e3e3e3}.wy-dropdown-menu>dd.call-to-action .btn{color:#fff}.wy-dropdown.wy-dropdown-up .wy-dropdown-menu{bottom:100%;top:auto;left:auto;right:0}.wy-dropdown.wy-dropdown-bubble .wy-dropdown-menu{background:#fcfcfc;margin-top:2px}.wy-dropdown.wy-dropdown-bubble .wy-dropdown-menu a{padding:6px 12px}.wy-dropdown.wy-dropdown-bubble .wy-dropdown-menu a:hover{background:#2980b9;color:#fff}.wy-dropdown.wy-dropdown-left .wy-dropdown-menu{right:0;left:auto;text-align:right}.wy-dropdown-arrow:before{content:" ";border-bottom:5px solid #f5f5f5;border-left:5px solid transparent;border-right:5px solid transparent;position:absolute;display:block;top:-4px;left:50%;margin-left:-3px}.wy-dropdown-arrow.wy-dropdown-arrow-left:before{left:11px}.wy-form-stacked select{display:block}.wy-form-aligned .wy-help-inline,.wy-form-aligned input,.wy-form-aligned label,.wy-form-aligned select,.wy-form-aligned textarea{display:inline-block;*display:inline;*zoom:1;vertical-align:middle}.wy-form-aligned .wy-control-group>label{display:inline-block;vertical-align:middle;width:10em;margin:6px 12px 0 0;float:left}.wy-form-aligned .wy-control{float:left}.wy-form-aligned .wy-control label{display:block}.wy-form-aligned .wy-control select{margin-top:6px}fieldset{margin:0}fieldset,legend{border:0;padding:0}legend{width:100%;white-space:normal;margin-bottom:24px;font-size:150%;*margin-left:-7px}label,legend{display:block}label{margin:0 0 .3125em;color:#333;font-size:90%}input,select,textarea{font-size:100%;margin:0;vertical-align:baseline;*vertical-align:middle}.wy-control-group{margin-bottom:24px;max-width:1200px;margin-left:auto;margin-right:auto;*zoom:1}.wy-control-group:after,.wy-control-group:before{display:table;content:""}.wy-control-group:after{clear:both}.wy-control-group.wy-control-group-required>label:after{content:" *";color:#e74c3c}.wy-control-group .wy-form-full,.wy-control-group .wy-form-halves,.wy-control-group .wy-form-thirds{padding-bottom:12px}.wy-control-group .wy-form-full input[type=color],.wy-control-group .wy-form-full input[type=date],.wy-control-group .wy-form-full input[type=datetime-local],.wy-control-group .wy-form-full input[type=datetime],.wy-control-group .wy-form-full input[type=email],.wy-control-group .wy-form-full input[type=month],.wy-control-group .wy-form-full input[type=number],.wy-control-group .wy-form-full input[type=password],.wy-control-group .wy-form-full input[type=search],.wy-control-group .wy-form-full input[type=tel],.wy-control-group .wy-form-full input[type=text],.wy-control-group .wy-form-full input[type=time],.wy-control-group .wy-form-full input[type=url],.wy-control-group .wy-form-full input[type=week],.wy-control-group .wy-form-full select,.wy-control-group .wy-form-halves input[type=color],.wy-control-group .wy-form-halves input[type=date],.wy-control-group .wy-form-halves input[type=datetime-local],.wy-control-group .wy-form-halves input[type=datetime],.wy-control-group .wy-form-halves input[type=email],.wy-control-group .wy-form-halves input[type=month],.wy-control-group .wy-form-halves input[type=number],.wy-control-group .wy-form-halves input[type=password],.wy-control-group .wy-form-halves input[type=search],.wy-control-group .wy-form-halves input[type=tel],.wy-control-group .wy-form-halves input[type=text],.wy-control-group .wy-form-halves input[type=time],.wy-control-group .wy-form-halves input[type=url],.wy-control-group .wy-form-halves input[type=week],.wy-control-group .wy-form-halves select,.wy-control-group .wy-form-thirds input[type=color],.wy-control-group .wy-form-thirds input[type=date],.wy-control-group .wy-form-thirds input[type=datetime-local],.wy-control-group .wy-form-thirds input[type=datetime],.wy-control-group .wy-form-thirds input[type=email],.wy-control-group .wy-form-thirds input[type=month],.wy-control-group .wy-form-thirds input[type=number],.wy-control-group .wy-form-thirds input[type=password],.wy-control-group .wy-form-thirds input[type=search],.wy-control-group .wy-form-thirds input[type=tel],.wy-control-group .wy-form-thirds input[type=text],.wy-control-group .wy-form-thirds input[type=time],.wy-control-group .wy-form-thirds input[type=url],.wy-control-group .wy-form-thirds input[type=week],.wy-control-group .wy-form-thirds select{width:100%}.wy-control-group .wy-form-full{float:left;display:block;width:100%;margin-right:0}.wy-control-group .wy-form-full:last-child{margin-right:0}.wy-control-group .wy-form-halves{float:left;display:block;margin-right:2.35765%;width:48.82117%}.wy-control-group .wy-form-halves:last-child,.wy-control-group .wy-form-halves:nth-of-type(2n){margin-right:0}.wy-control-group .wy-form-halves:nth-of-type(odd){clear:left}.wy-control-group .wy-form-thirds{float:left;display:block;margin-right:2.35765%;width:31.76157%}.wy-control-group .wy-form-thirds:last-child,.wy-control-group .wy-form-thirds:nth-of-type(3n){margin-right:0}.wy-control-group .wy-form-thirds:nth-of-type(3n+1){clear:left}.wy-control-group.wy-control-group-no-input .wy-control,.wy-control-no-input{margin:6px 0 0;font-size:90%}.wy-control-no-input{display:inline-block}.wy-control-group.fluid-input input[type=color],.wy-control-group.fluid-input input[type=date],.wy-control-group.fluid-input input[type=datetime-local],.wy-control-group.fluid-input input[type=datetime],.wy-control-group.fluid-input input[type=email],.wy-control-group.fluid-input input[type=month],.wy-control-group.fluid-input input[type=number],.wy-control-group.fluid-input input[type=password],.wy-control-group.fluid-input input[type=search],.wy-control-group.fluid-input input[type=tel],.wy-control-group.fluid-input input[type=text],.wy-control-group.fluid-input input[type=time],.wy-control-group.fluid-input input[type=url],.wy-control-group.fluid-input input[type=week]{width:100%}.wy-form-message-inline{padding-left:.3em;color:#666;font-size:90%}.wy-form-message{display:block;color:#999;font-size:70%;margin-top:.3125em;font-style:italic}.wy-form-message p{font-size:inherit;font-style:italic;margin-bottom:6px}.wy-form-message p:last-child{margin-bottom:0}input{line-height:normal}input[type=button],input[type=reset],input[type=submit]{-webkit-appearance:button;cursor:pointer;font-family:Lato,proxima-nova,Helvetica Neue,Arial,sans-serif;*overflow:visible}input[type=color],input[type=date],input[type=datetime-local],input[type=datetime],input[type=email],input[type=month],input[type=number],input[type=password],input[type=search],input[type=tel],input[type=text],input[type=time],input[type=url],input[type=week]{-webkit-appearance:none;padding:6px;display:inline-block;border:1px solid #ccc;font-size:80%;font-family:Lato,proxima-nova,Helvetica Neue,Arial,sans-serif;box-shadow:inset 0 1px 3px #ddd;border-radius:0;-webkit-transition:border .3s linear;-moz-transition:border .3s linear;transition:border .3s linear}input[type=datetime-local]{padding:.34375em .625em}input[disabled]{cursor:default}input[type=checkbox],input[type=radio]{padding:0;margin-right:.3125em;*height:13px;*width:13px}input[type=checkbox],input[type=radio],input[type=search]{-webkit-box-sizing:border-box;-moz-box-sizing:border-box;box-sizing:border-box}input[type=search]::-webkit-search-cancel-button,input[type=search]::-webkit-search-decoration{-webkit-appearance:none}input[type=color]:focus,input[type=date]:focus,input[type=datetime-local]:focus,input[type=datetime]:focus,input[type=email]:focus,input[type=month]:focus,input[type=number]:focus,input[type=password]:focus,input[type=search]:focus,input[type=tel]:focus,input[type=text]:focus,input[type=time]:focus,input[type=url]:focus,input[type=week]:focus{outline:0;outline:thin dotted\9;border-color:#333}input.no-focus:focus{border-color:#ccc!important}input[type=checkbox]:focus,input[type=file]:focus,input[type=radio]:focus{outline:thin dotted #333;outline:1px auto #129fea}input[type=color][disabled],input[type=date][disabled],input[type=datetime-local][disabled],input[type=datetime][disabled],input[type=email][disabled],input[type=month][disabled],input[type=number][disabled],input[type=password][disabled],input[type=search][disabled],input[type=tel][disabled],input[type=text][disabled],input[type=time][disabled],input[type=url][disabled],input[type=week][disabled]{cursor:not-allowed;background-color:#fafafa}input:focus:invalid,select:focus:invalid,textarea:focus:invalid{color:#e74c3c;border:1px solid #e74c3c}input:focus:invalid:focus,select:focus:invalid:focus,textarea:focus:invalid:focus{border-color:#e74c3c}input[type=checkbox]:focus:invalid:focus,input[type=file]:focus:invalid:focus,input[type=radio]:focus:invalid:focus{outline-color:#e74c3c}input.wy-input-large{padding:12px;font-size:100%}textarea{overflow:auto;vertical-align:top;width:100%;font-family:Lato,proxima-nova,Helvetica Neue,Arial,sans-serif}select,textarea{padding:.5em .625em;display:inline-block;border:1px solid #ccc;font-size:80%;box-shadow:inset 0 1px 3px #ddd;-webkit-transition:border .3s linear;-moz-transition:border .3s linear;transition:border .3s linear}select{border:1px solid #ccc;background-color:#fff}select[multiple]{height:auto}select:focus,textarea:focus{outline:0}input[readonly],select[disabled],select[readonly],textarea[disabled],textarea[readonly]{cursor:not-allowed;background-color:#fafafa}input[type=checkbox][disabled],input[type=radio][disabled]{cursor:not-allowed}.wy-checkbox,.wy-radio{margin:6px 0;color:#404040;display:block}.wy-checkbox input,.wy-radio input{vertical-align:baseline}.wy-form-message-inline{display:inline-block;*display:inline;*zoom:1;vertical-align:middle}.wy-input-prefix,.wy-input-suffix{white-space:nowrap;padding:6px}.wy-input-prefix .wy-input-context,.wy-input-suffix .wy-input-context{line-height:27px;padding:0 8px;display:inline-block;font-size:80%;background-color:#f3f6f6;border:1px solid #ccc;color:#999}.wy-input-suffix .wy-input-context{border-left:0}.wy-input-prefix .wy-input-context{border-right:0}.wy-switch{position:relative;display:block;height:24px;margin-top:12px;cursor:pointer}.wy-switch:before{left:0;top:0;width:36px;height:12px;background:#ccc}.wy-switch:after,.wy-switch:before{position:absolute;content:"";display:block;border-radius:4px;-webkit-transition:all .2s ease-in-out;-moz-transition:all .2s ease-in-out;transition:all .2s ease-in-out}.wy-switch:after{width:18px;height:18px;background:#999;left:-3px;top:-3px}.wy-switch span{position:absolute;left:48px;display:block;font-size:12px;color:#ccc;line-height:1}.wy-switch.active:before{background:#1e8449}.wy-switch.active:after{left:24px;background:#27ae60}.wy-switch.disabled{cursor:not-allowed;opacity:.8}.wy-control-group.wy-control-group-error .wy-form-message,.wy-control-group.wy-control-group-error>label{color:#e74c3c}.wy-control-group.wy-control-group-error input[type=color],.wy-control-group.wy-control-group-error input[type=date],.wy-control-group.wy-control-group-error input[type=datetime-local],.wy-control-group.wy-control-group-error input[type=datetime],.wy-control-group.wy-control-group-error input[type=email],.wy-control-group.wy-control-group-error input[type=month],.wy-control-group.wy-control-group-error input[type=number],.wy-control-group.wy-control-group-error input[type=password],.wy-control-group.wy-control-group-error input[type=search],.wy-control-group.wy-control-group-error input[type=tel],.wy-control-group.wy-control-group-error input[type=text],.wy-control-group.wy-control-group-error input[type=time],.wy-control-group.wy-control-group-error input[type=url],.wy-control-group.wy-control-group-error input[type=week],.wy-control-group.wy-control-group-error textarea{border:1px solid #e74c3c}.wy-inline-validate{white-space:nowrap}.wy-inline-validate .wy-input-context{padding:.5em .625em;display:inline-block;font-size:80%}.wy-inline-validate.wy-inline-validate-success .wy-input-context{color:#27ae60}.wy-inline-validate.wy-inline-validate-danger .wy-input-context{color:#e74c3c}.wy-inline-validate.wy-inline-validate-warning .wy-input-context{color:#e67e22}.wy-inline-validate.wy-inline-validate-info .wy-input-context{color:#2980b9}.rotate-90{-webkit-transform:rotate(90deg);-moz-transform:rotate(90deg);-ms-transform:rotate(90deg);-o-transform:rotate(90deg);transform:rotate(90deg)}.rotate-180{-webkit-transform:rotate(180deg);-moz-transform:rotate(180deg);-ms-transform:rotate(180deg);-o-transform:rotate(180deg);transform:rotate(180deg)}.rotate-270{-webkit-transform:rotate(270deg);-moz-transform:rotate(270deg);-ms-transform:rotate(270deg);-o-transform:rotate(270deg);transform:rotate(270deg)}.mirror{-webkit-transform:scaleX(-1);-moz-transform:scaleX(-1);-ms-transform:scaleX(-1);-o-transform:scaleX(-1);transform:scaleX(-1)}.mirror.rotate-90{-webkit-transform:scaleX(-1) rotate(90deg);-moz-transform:scaleX(-1) rotate(90deg);-ms-transform:scaleX(-1) rotate(90deg);-o-transform:scaleX(-1) rotate(90deg);transform:scaleX(-1) rotate(90deg)}.mirror.rotate-180{-webkit-transform:scaleX(-1) rotate(180deg);-moz-transform:scaleX(-1) rotate(180deg);-ms-transform:scaleX(-1) rotate(180deg);-o-transform:scaleX(-1) rotate(180deg);transform:scaleX(-1) rotate(180deg)}.mirror.rotate-270{-webkit-transform:scaleX(-1) rotate(270deg);-moz-transform:scaleX(-1) rotate(270deg);-ms-transform:scaleX(-1) rotate(270deg);-o-transform:scaleX(-1) rotate(270deg);transform:scaleX(-1) rotate(270deg)}@media only screen and (max-width:480px){.wy-form button[type=submit]{margin:.7em 0 0}.wy-form input[type=color],.wy-form input[type=date],.wy-form input[type=datetime-local],.wy-form input[type=datetime],.wy-form input[type=email],.wy-form input[type=month],.wy-form input[type=number],.wy-form input[type=password],.wy-form input[type=search],.wy-form input[type=tel],.wy-form input[type=text],.wy-form input[type=time],.wy-form input[type=url],.wy-form input[type=week],.wy-form label{margin-bottom:.3em;display:block}.wy-form input[type=color],.wy-form input[type=date],.wy-form input[type=datetime-local],.wy-form input[type=datetime],.wy-form input[type=email],.wy-form input[type=month],.wy-form input[type=number],.wy-form input[type=password],.wy-form input[type=search],.wy-form input[type=tel],.wy-form input[type=time],.wy-form input[type=url],.wy-form input[type=week]{margin-bottom:0}.wy-form-aligned .wy-control-group label{margin-bottom:.3em;text-align:left;display:block;width:100%}.wy-form-aligned .wy-control{margin:1.5em 0 0}.wy-form-message,.wy-form-message-inline,.wy-form .wy-help-inline{display:block;font-size:80%;padding:6px 0}}@media screen and (max-width:768px){.tablet-hide{display:none}}@media screen and (max-width:480px){.mobile-hide{display:none}}.float-left{float:left}.float-right{float:right}.full-width{width:100%}.rst-content table.docutils,.rst-content table.field-list,.wy-table{border-collapse:collapse;border-spacing:0;empty-cells:show;margin-bottom:24px}.rst-content table.docutils caption,.rst-content table.field-list caption,.wy-table caption{color:#000;font:italic 85%/1 arial,sans-serif;padding:1em 0;text-align:center}.rst-content table.docutils td,.rst-content table.docutils th,.rst-content table.field-list td,.rst-content table.field-list th,.wy-table td,.wy-table th{font-size:90%;margin:0;overflow:visible;padding:8px 16px}.rst-content table.docutils td:first-child,.rst-content table.docutils th:first-child,.rst-content table.field-list td:first-child,.rst-content table.field-list th:first-child,.wy-table td:first-child,.wy-table th:first-child{border-left-width:0}.rst-content table.docutils thead,.rst-content table.field-list thead,.wy-table thead{color:#000;text-align:left;vertical-align:bottom;white-space:nowrap}.rst-content table.docutils thead th,.rst-content table.field-list thead th,.wy-table thead th{font-weight:700;border-bottom:2px solid #e1e4e5}.rst-content table.docutils td,.rst-content table.field-list td,.wy-table td{background-color:transparent;vertical-align:middle}.rst-content table.docutils td p,.rst-content table.field-list td p,.wy-table td p{line-height:18px}.rst-content table.docutils td p:last-child,.rst-content table.field-list td p:last-child,.wy-table td p:last-child{margin-bottom:0}.rst-content table.docutils .wy-table-cell-min,.rst-content table.field-list .wy-table-cell-min,.wy-table .wy-table-cell-min{width:1%;padding-right:0}.rst-content table.docutils .wy-table-cell-min input[type=checkbox],.rst-content table.field-list .wy-table-cell-min input[type=checkbox],.wy-table .wy-table-cell-min input[type=checkbox]{margin:0}.wy-table-secondary{color:grey;font-size:90%}.wy-table-tertiary{color:grey;font-size:80%}.rst-content table.docutils:not(.field-list) tr:nth-child(2n-1) td,.wy-table-backed,.wy-table-odd td,.wy-table-striped tr:nth-child(2n-1) td{background-color:#f3f6f6}.rst-content table.docutils,.wy-table-bordered-all{border:1px solid #e1e4e5}.rst-content table.docutils td,.wy-table-bordered-all td{border-bottom:1px solid #e1e4e5;border-left:1px solid #e1e4e5}.rst-content table.docutils tbody>tr:last-child td,.wy-table-bordered-all tbody>tr:last-child td{border-bottom-width:0}.wy-table-bordered{border:1px solid #e1e4e5}.wy-table-bordered-rows td{border-bottom:1px solid #e1e4e5}.wy-table-bordered-rows tbody>tr:last-child td{border-bottom-width:0}.wy-table-horizontal td,.wy-table-horizontal th{border-width:0 0 1px;border-bottom:1px solid #e1e4e5}.wy-table-horizontal tbody>tr:last-child td{border-bottom-width:0}.wy-table-responsive{margin-bottom:24px;max-width:100%;overflow:auto}.wy-table-responsive table{margin-bottom:0!important}.wy-table-responsive table td,.wy-table-responsive table th{white-space:nowrap}a{color:#2980b9;text-decoration:none;cursor:pointer}a:hover{color:#3091d1}a:visited{color:#9b59b6}html{height:100%}body,html{overflow-x:hidden}body{font-family:Lato,proxima-nova,Helvetica Neue,Arial,sans-serif;font-weight:400;color:#404040;min-height:100%;background:#edf0f2}.wy-text-left{text-align:left}.wy-text-center{text-align:center}.wy-text-right{text-align:right}.wy-text-large{font-size:120%}.wy-text-normal{font-size:100%}.wy-text-small,small{font-size:80%}.wy-text-strike{text-decoration:line-through}.wy-text-warning{color:#e67e22!important}a.wy-text-warning:hover{color:#eb9950!important}.wy-text-info{color:#2980b9!important}a.wy-text-info:hover{color:#409ad5!important}.wy-text-success{color:#27ae60!important}a.wy-text-success:hover{color:#36d278!important}.wy-text-danger{color:#e74c3c!important}a.wy-text-danger:hover{color:#ed7669!important}.wy-text-neutral{color:#404040!important}a.wy-text-neutral:hover{color:#595959!important}.rst-content .toctree-wrapper>p.caption,h1,h2,h3,h4,h5,h6,legend{margin-top:0;font-weight:700;font-family:Roboto Slab,ff-tisa-web-pro,Georgia,Arial,sans-serif}p{line-height:24px;font-size:16px;margin:0 0 24px}h1{font-size:175%}.rst-content .toctree-wrapper>p.caption,h2{font-size:150%}h3{font-size:125%}h4{font-size:115%}h5{font-size:110%}h6{font-size:100%}hr{display:block;height:1px;border:0;border-top:1px solid #e1e4e5;margin:24px 0;padding:0}.rst-content code,.rst-content tt,code{white-space:nowrap;max-width:100%;background:#fff;border:1px solid #e1e4e5;font-size:75%;padding:0 5px;font-family:SFMono-Regular,Menlo,Monaco,Consolas,Liberation Mono,Courier New,Courier,monospace;color:#e74c3c;overflow-x:auto}.rst-content tt.code-large,code.code-large{font-size:90%}.rst-content .section ul,.rst-content .toctree-wrapper ul,.rst-content section ul,.wy-plain-list-disc,article ul{list-style:disc;line-height:24px;margin-bottom:24px}.rst-content .section ul li,.rst-content .toctree-wrapper ul li,.rst-content section ul li,.wy-plain-list-disc li,article ul li{list-style:disc;margin-left:24px}.rst-content .section ul li p:last-child,.rst-content .section ul li ul,.rst-content .toctree-wrapper ul li p:last-child,.rst-content .toctree-wrapper ul li ul,.rst-content section ul li p:last-child,.rst-content section ul li ul,.wy-plain-list-disc li p:last-child,.wy-plain-list-disc li ul,article ul li p:last-child,article ul li ul{margin-bottom:0}.rst-content .section ul li li,.rst-content .toctree-wrapper ul li li,.rst-content section ul li li,.wy-plain-list-disc li li,article ul li li{list-style:circle}.rst-content .section ul li li li,.rst-content .toctree-wrapper ul li li li,.rst-content section ul li li li,.wy-plain-list-disc li li li,article ul li li li{list-style:square}.rst-content .section ul li ol li,.rst-content .toctree-wrapper ul li ol li,.rst-content section ul li ol li,.wy-plain-list-disc li ol li,article ul li ol li{list-style:decimal}.rst-content .section ol,.rst-content .section ol.arabic,.rst-content .toctree-wrapper ol,.rst-content .toctree-wrapper ol.arabic,.rst-content section ol,.rst-content section ol.arabic,.wy-plain-list-decimal,article ol{list-style:decimal;line-height:24px;margin-bottom:24px}.rst-content .section ol.arabic li,.rst-content .section ol li,.rst-content .toctree-wrapper ol.arabic li,.rst-content .toctree-wrapper ol li,.rst-content section ol.arabic li,.rst-content section ol li,.wy-plain-list-decimal li,article ol li{list-style:decimal;margin-left:24px}.rst-content .section ol.arabic li ul,.rst-content .section ol li p:last-child,.rst-content .section ol li ul,.rst-content .toctree-wrapper ol.arabic li ul,.rst-content .toctree-wrapper ol li p:last-child,.rst-content .toctree-wrapper ol li ul,.rst-content section ol.arabic li ul,.rst-content section ol li p:last-child,.rst-content section ol li ul,.wy-plain-list-decimal li p:last-child,.wy-plain-list-decimal li ul,article ol li p:last-child,article ol li ul{margin-bottom:0}.rst-content .section ol.arabic li ul li,.rst-content .section ol li ul li,.rst-content .toctree-wrapper ol.arabic li ul li,.rst-content .toctree-wrapper ol li ul li,.rst-content section ol.arabic li ul li,.rst-content section ol li ul li,.wy-plain-list-decimal li ul li,article ol li ul li{list-style:disc}.wy-breadcrumbs{*zoom:1}.wy-breadcrumbs:after,.wy-breadcrumbs:before{display:table;content:""}.wy-breadcrumbs:after{clear:both}.wy-breadcrumbs>li{display:inline-block;padding-top:5px}.wy-breadcrumbs>li.wy-breadcrumbs-aside{float:right}.rst-content .wy-breadcrumbs>li code,.rst-content .wy-breadcrumbs>li tt,.wy-breadcrumbs>li .rst-content tt,.wy-breadcrumbs>li code{all:inherit;color:inherit}.breadcrumb-item:before{content:"/";color:#bbb;font-size:13px;padding:0 6px 0 3px}.wy-breadcrumbs-extra{margin-bottom:0;color:#b3b3b3;font-size:80%;display:inline-block}@media screen and (max-width:480px){.wy-breadcrumbs-extra,.wy-breadcrumbs li.wy-breadcrumbs-aside{display:none}}@media print{.wy-breadcrumbs li.wy-breadcrumbs-aside{display:none}}html{font-size:16px}.wy-affix{position:fixed;top:1.618em}.wy-menu a:hover{text-decoration:none}.wy-menu-horiz{*zoom:1}.wy-menu-horiz:after,.wy-menu-horiz:before{display:table;content:""}.wy-menu-horiz:after{clear:both}.wy-menu-horiz li,.wy-menu-horiz ul{display:inline-block}.wy-menu-horiz li:hover{background:hsla(0,0%,100%,.1)}.wy-menu-horiz li.divide-left{border-left:1px solid #404040}.wy-menu-horiz li.divide-right{border-right:1px solid #404040}.wy-menu-horiz a{height:32px;display:inline-block;line-height:32px;padding:0 16px}.wy-menu-vertical{width:300px}.wy-menu-vertical header,.wy-menu-vertical p.caption{color:#55a5d9;height:32px;line-height:32px;padding:0 1.618em;margin:12px 0 0;display:block;font-weight:700;text-transform:uppercase;font-size:85%;white-space:nowrap}.wy-menu-vertical ul{margin-bottom:0}.wy-menu-vertical li.divide-top{border-top:1px solid #404040}.wy-menu-vertical li.divide-bottom{border-bottom:1px solid #404040}.wy-menu-vertical li.current{background:#e3e3e3}.wy-menu-vertical li.current a{color:grey;border-right:1px solid #c9c9c9;padding:.4045em 2.427em}.wy-menu-vertical li.current a:hover{background:#d6d6d6}.rst-content .wy-menu-vertical li tt,.wy-menu-vertical li .rst-content tt,.wy-menu-vertical li code{border:none;background:inherit;color:inherit;padding-left:0;padding-right:0}.wy-menu-vertical li button.toctree-expand{display:block;float:left;margin-left:-1.2em;line-height:18px;color:#4d4d4d;border:none;background:none;padding:0}.wy-menu-vertical li.current>a,.wy-menu-vertical li.on a{color:#404040;font-weight:700;position:relative;background:#fcfcfc;border:none;padding:.4045em 1.618em}.wy-menu-vertical li.current>a:hover,.wy-menu-vertical li.on a:hover{background:#fcfcfc}.wy-menu-vertical li.current>a:hover button.toctree-expand,.wy-menu-vertical li.on a:hover button.toctree-expand{color:grey}.wy-menu-vertical li.current>a button.toctree-expand,.wy-menu-vertical li.on a button.toctree-expand{display:block;line-height:18px;color:#333}.wy-menu-vertical li.toctree-l1.current>a{border-bottom:1px solid #c9c9c9;border-top:1px solid #c9c9c9}.wy-menu-vertical .toctree-l1.current .toctree-l2>ul,.wy-menu-vertical .toctree-l2.current .toctree-l3>ul,.wy-menu-vertical .toctree-l3.current .toctree-l4>ul,.wy-menu-vertical .toctree-l4.current .toctree-l5>ul,.wy-menu-vertical .toctree-l5.current .toctree-l6>ul,.wy-menu-vertical .toctree-l6.current .toctree-l7>ul,.wy-menu-vertical .toctree-l7.current .toctree-l8>ul,.wy-menu-vertical .toctree-l8.current .toctree-l9>ul,.wy-menu-vertical .toctree-l9.current .toctree-l10>ul,.wy-menu-vertical .toctree-l10.current .toctree-l11>ul{display:none}.wy-menu-vertical .toctree-l1.current .current.toctree-l2>ul,.wy-menu-vertical .toctree-l2.current .current.toctree-l3>ul,.wy-menu-vertical .toctree-l3.current .current.toctree-l4>ul,.wy-menu-vertical .toctree-l4.current .current.toctree-l5>ul,.wy-menu-vertical .toctree-l5.current .current.toctree-l6>ul,.wy-menu-vertical .toctree-l6.current .current.toctree-l7>ul,.wy-menu-vertical .toctree-l7.current .current.toctree-l8>ul,.wy-menu-vertical .toctree-l8.current .current.toctree-l9>ul,.wy-menu-vertical .toctree-l9.current .current.toctree-l10>ul,.wy-menu-vertical .toctree-l10.current .current.toctree-l11>ul{display:block}.wy-menu-vertical li.toctree-l3,.wy-menu-vertical li.toctree-l4{font-size:.9em}.wy-menu-vertical li.toctree-l2 a,.wy-menu-vertical li.toctree-l3 a,.wy-menu-vertical li.toctree-l4 a,.wy-menu-vertical li.toctree-l5 a,.wy-menu-vertical li.toctree-l6 a,.wy-menu-vertical li.toctree-l7 a,.wy-menu-vertical li.toctree-l8 a,.wy-menu-vertical li.toctree-l9 a,.wy-menu-vertical li.toctree-l10 a{color:#404040}.wy-menu-vertical li.toctree-l2 a:hover button.toctree-expand,.wy-menu-vertical li.toctree-l3 a:hover button.toctree-expand,.wy-menu-vertical li.toctree-l4 a:hover button.toctree-expand,.wy-menu-vertical li.toctree-l5 a:hover button.toctree-expand,.wy-menu-vertical li.toctree-l6 a:hover button.toctree-expand,.wy-menu-vertical li.toctree-l7 a:hover button.toctree-expand,.wy-menu-vertical li.toctree-l8 a:hover button.toctree-expand,.wy-menu-vertical li.toctree-l9 a:hover button.toctree-expand,.wy-menu-vertical li.toctree-l10 a:hover button.toctree-expand{color:grey}.wy-menu-vertical li.toctree-l2.current li.toctree-l3>a,.wy-menu-vertical li.toctree-l3.current li.toctree-l4>a,.wy-menu-vertical li.toctree-l4.current li.toctree-l5>a,.wy-menu-vertical li.toctree-l5.current li.toctree-l6>a,.wy-menu-vertical li.toctree-l6.current li.toctree-l7>a,.wy-menu-vertical li.toctree-l7.current li.toctree-l8>a,.wy-menu-vertical li.toctree-l8.current li.toctree-l9>a,.wy-menu-vertical li.toctree-l9.current li.toctree-l10>a,.wy-menu-vertical li.toctree-l10.current li.toctree-l11>a{display:block}.wy-menu-vertical li.toctree-l2.current>a{padding:.4045em 2.427em}.wy-menu-vertical li.toctree-l2.current li.toctree-l3>a{padding:.4045em 1.618em .4045em 4.045em}.wy-menu-vertical li.toctree-l3.current>a{padding:.4045em 4.045em}.wy-menu-vertical li.toctree-l3.current li.toctree-l4>a{padding:.4045em 1.618em .4045em 5.663em}.wy-menu-vertical li.toctree-l4.current>a{padding:.4045em 5.663em}.wy-menu-vertical li.toctree-l4.current li.toctree-l5>a{padding:.4045em 1.618em .4045em 7.281em}.wy-menu-vertical li.toctree-l5.current>a{padding:.4045em 7.281em}.wy-menu-vertical li.toctree-l5.current li.toctree-l6>a{padding:.4045em 1.618em .4045em 8.899em}.wy-menu-vertical li.toctree-l6.current>a{padding:.4045em 8.899em}.wy-menu-vertical li.toctree-l6.current li.toctree-l7>a{padding:.4045em 1.618em .4045em 10.517em}.wy-menu-vertical li.toctree-l7.current>a{padding:.4045em 10.517em}.wy-menu-vertical li.toctree-l7.current li.toctree-l8>a{padding:.4045em 1.618em .4045em 12.135em}.wy-menu-vertical li.toctree-l8.current>a{padding:.4045em 12.135em}.wy-menu-vertical li.toctree-l8.current li.toctree-l9>a{padding:.4045em 1.618em .4045em 13.753em}.wy-menu-vertical li.toctree-l9.current>a{padding:.4045em 13.753em}.wy-menu-vertical li.toctree-l9.current li.toctree-l10>a{padding:.4045em 1.618em .4045em 15.371em}.wy-menu-vertical li.toctree-l10.current>a{padding:.4045em 15.371em}.wy-menu-vertical li.toctree-l10.current li.toctree-l11>a{padding:.4045em 1.618em .4045em 16.989em}.wy-menu-vertical li.toctree-l2.current>a,.wy-menu-vertical li.toctree-l2.current li.toctree-l3>a{background:#c9c9c9}.wy-menu-vertical li.toctree-l2 button.toctree-expand{color:#a3a3a3}.wy-menu-vertical li.toctree-l3.current>a,.wy-menu-vertical li.toctree-l3.current li.toctree-l4>a{background:#bdbdbd}.wy-menu-vertical li.toctree-l3 button.toctree-expand{color:#969696}.wy-menu-vertical li.current ul{display:block}.wy-menu-vertical li ul{margin-bottom:0;display:none}.wy-menu-vertical li ul li a{margin-bottom:0;color:#d9d9d9;font-weight:400}.wy-menu-vertical a{line-height:18px;padding:.4045em 1.618em;display:block;position:relative;font-size:90%;color:#d9d9d9}.wy-menu-vertical a:hover{background-color:#4e4a4a;cursor:pointer}.wy-menu-vertical a:hover button.toctree-expand{color:#d9d9d9}.wy-menu-vertical a:active{background-color:#2980b9;cursor:pointer;color:#fff}.wy-menu-vertical a:active button.toctree-expand{color:#fff}.wy-side-nav-search{display:block;width:300px;padding:.809em;margin-bottom:.809em;z-index:200;background-color:#2980b9;text-align:center;color:#fcfcfc}.wy-side-nav-search input[type=text]{width:100%;border-radius:50px;padding:6px 12px;border-color:#2472a4}.wy-side-nav-search img{display:block;margin:auto auto .809em;height:45px;width:45px;background-color:#2980b9;padding:5px;border-radius:100%}.wy-side-nav-search .wy-dropdown>a,.wy-side-nav-search>a{color:#fcfcfc;font-size:100%;font-weight:700;display:inline-block;padding:4px 6px;margin-bottom:.809em;max-width:100%}.wy-side-nav-search .wy-dropdown>a:hover,.wy-side-nav-search>a:hover{background:hsla(0,0%,100%,.1)}.wy-side-nav-search .wy-dropdown>a img.logo,.wy-side-nav-search>a img.logo{display:block;margin:0 auto;height:auto;width:auto;border-radius:0;max-width:100%;background:transparent}.wy-side-nav-search .wy-dropdown>a.icon img.logo,.wy-side-nav-search>a.icon img.logo{margin-top:.85em}.wy-side-nav-search>div.version{margin-top:-.4045em;margin-bottom:.809em;font-weight:400;color:hsla(0,0%,100%,.3)}.wy-nav .wy-menu-vertical header{color:#2980b9}.wy-nav .wy-menu-vertical a{color:#b3b3b3}.wy-nav .wy-menu-vertical a:hover{background-color:#2980b9;color:#fff}[data-menu-wrap]{-webkit-transition:all .2s ease-in;-moz-transition:all .2s ease-in;transition:all .2s ease-in;position:absolute;opacity:1;width:100%;opacity:0}[data-menu-wrap].move-center{left:0;right:auto;opacity:1}[data-menu-wrap].move-left{right:auto;left:-100%;opacity:0}[data-menu-wrap].move-right{right:-100%;left:auto;opacity:0}.wy-body-for-nav{background:#fcfcfc}.wy-grid-for-nav{position:absolute;width:100%;height:100%}.wy-nav-side{position:fixed;top:0;bottom:0;left:0;padding-bottom:2em;width:300px;overflow-x:hidden;overflow-y:hidden;min-height:100%;color:#9b9b9b;background:#343131;z-index:200}.wy-side-scroll{width:320px;position:relative;overflow-x:hidden;overflow-y:scroll;height:100%}.wy-nav-top{display:none;background:#2980b9;color:#fff;padding:.4045em .809em;position:relative;line-height:50px;text-align:center;font-size:100%;*zoom:1}.wy-nav-top:after,.wy-nav-top:before{display:table;content:""}.wy-nav-top:after{clear:both}.wy-nav-top a{color:#fff;font-weight:700}.wy-nav-top img{margin-right:12px;height:45px;width:45px;background-color:#2980b9;padding:5px;border-radius:100%}.wy-nav-top i{font-size:30px;float:left;cursor:pointer;padding-top:inherit}.wy-nav-content-wrap{margin-left:300px;background:#fcfcfc;min-height:100%}.wy-nav-content{padding:1.618em 3.236em;height:100%;max-width:800px;margin:auto}.wy-body-mask{position:fixed;width:100%;height:100%;background:rgba(0,0,0,.2);display:none;z-index:499}.wy-body-mask.on{display:block}footer{color:grey}footer p{margin-bottom:12px}.rst-content footer span.commit tt,footer span.commit .rst-content tt,footer span.commit code{padding:0;font-family:SFMono-Regular,Menlo,Monaco,Consolas,Liberation Mono,Courier New,Courier,monospace;font-size:1em;background:none;border:none;color:grey}.rst-footer-buttons{*zoom:1}.rst-footer-buttons:after,.rst-footer-buttons:before{width:100%;display:table;content:""}.rst-footer-buttons:after{clear:both}.rst-breadcrumbs-buttons{margin-top:12px;*zoom:1}.rst-breadcrumbs-buttons:after,.rst-breadcrumbs-buttons:before{display:table;content:""}.rst-breadcrumbs-buttons:after{clear:both}#search-results .search li{margin-bottom:24px;border-bottom:1px solid #e1e4e5;padding-bottom:24px}#search-results .search li:first-child{border-top:1px solid #e1e4e5;padding-top:24px}#search-results .search li a{font-size:120%;margin-bottom:12px;display:inline-block}#search-results .context{color:grey;font-size:90%}.genindextable li>ul{margin-left:24px}@media screen and (max-width:768px){.wy-body-for-nav{background:#fcfcfc}.wy-nav-top{display:block}.wy-nav-side{left:-300px}.wy-nav-side.shift{width:85%;left:0}.wy-menu.wy-menu-vertical,.wy-side-nav-search,.wy-side-scroll{width:auto}.wy-nav-content-wrap{margin-left:0}.wy-nav-content-wrap .wy-nav-content{padding:1.618em}.wy-nav-content-wrap.shift{position:fixed;min-width:100%;left:85%;top:0;height:100%;overflow:hidden}}@media screen and (min-width:1100px){.wy-nav-content-wrap{background:rgba(0,0,0,.05)}.wy-nav-content{margin:0;background:#fcfcfc}}@media print{.rst-versions,.wy-nav-side,footer{display:none}.wy-nav-content-wrap{margin-left:0}}.rst-versions{position:fixed;bottom:0;left:0;width:300px;color:#fcfcfc;background:#1f1d1d;font-family:Lato,proxima-nova,Helvetica Neue,Arial,sans-serif;z-index:400}.rst-versions a{color:#2980b9;text-decoration:none}.rst-versions .rst-badge-small{display:none}.rst-versions .rst-current-version{padding:12px;background-color:#272525;display:block;text-align:right;font-size:90%;cursor:pointer;color:#27ae60;*zoom:1}.rst-versions .rst-current-version:after,.rst-versions .rst-current-version:before{display:table;content:""}.rst-versions .rst-current-version:after{clear:both}.rst-content .code-block-caption .rst-versions .rst-current-version .headerlink,.rst-content .eqno .rst-versions .rst-current-version .headerlink,.rst-content .rst-versions .rst-current-version .admonition-title,.rst-content code.download .rst-versions .rst-current-version span:first-child,.rst-content dl dt .rst-versions .rst-current-version .headerlink,.rst-content h1 .rst-versions .rst-current-version .headerlink,.rst-content h2 .rst-versions .rst-current-version .headerlink,.rst-content h3 .rst-versions .rst-current-version .headerlink,.rst-content h4 .rst-versions .rst-current-version .headerlink,.rst-content h5 .rst-versions .rst-current-version .headerlink,.rst-content h6 .rst-versions .rst-current-version .headerlink,.rst-content p .rst-versions .rst-current-version .headerlink,.rst-content table>caption .rst-versions .rst-current-version .headerlink,.rst-content tt.download .rst-versions .rst-current-version span:first-child,.rst-versions .rst-current-version .fa,.rst-versions .rst-current-version .icon,.rst-versions .rst-current-version .rst-content .admonition-title,.rst-versions .rst-current-version .rst-content .code-block-caption .headerlink,.rst-versions .rst-current-version .rst-content .eqno .headerlink,.rst-versions .rst-current-version .rst-content code.download span:first-child,.rst-versions .rst-current-version .rst-content dl dt .headerlink,.rst-versions .rst-current-version .rst-content h1 .headerlink,.rst-versions .rst-current-version .rst-content h2 .headerlink,.rst-versions .rst-current-version .rst-content h3 .headerlink,.rst-versions .rst-current-version .rst-content h4 .headerlink,.rst-versions .rst-current-version .rst-content h5 .headerlink,.rst-versions .rst-current-version .rst-content h6 .headerlink,.rst-versions .rst-current-version .rst-content p .headerlink,.rst-versions .rst-current-version .rst-content table>caption .headerlink,.rst-versions .rst-current-version .rst-content tt.download span:first-child,.rst-versions .rst-current-version .wy-menu-vertical li button.toctree-expand,.wy-menu-vertical li .rst-versions .rst-current-version button.toctree-expand{color:#fcfcfc}.rst-versions .rst-current-version .fa-book,.rst-versions .rst-current-version .icon-book{float:left}.rst-versions .rst-current-version.rst-out-of-date{background-color:#e74c3c;color:#fff}.rst-versions .rst-current-version.rst-active-old-version{background-color:#f1c40f;color:#000}.rst-versions.shift-up{height:auto;max-height:100%;overflow-y:scroll}.rst-versions.shift-up .rst-other-versions{display:block}.rst-versions .rst-other-versions{font-size:90%;padding:12px;color:grey;display:none}.rst-versions .rst-other-versions hr{display:block;height:1px;border:0;margin:20px 0;padding:0;border-top:1px solid #413d3d}.rst-versions .rst-other-versions dd{display:inline-block;margin:0}.rst-versions .rst-other-versions dd a{display:inline-block;padding:6px;color:#fcfcfc}.rst-versions.rst-badge{width:auto;bottom:20px;right:20px;left:auto;border:none;max-width:300px;max-height:90%}.rst-versions.rst-badge .fa-book,.rst-versions.rst-badge .icon-book{float:none;line-height:30px}.rst-versions.rst-badge.shift-up .rst-current-version{text-align:right}.rst-versions.rst-badge.shift-up .rst-current-version .fa-book,.rst-versions.rst-badge.shift-up .rst-current-version .icon-book{float:left}.rst-versions.rst-badge>.rst-current-version{width:auto;height:30px;line-height:30px;padding:0 6px;display:block;text-align:center}@media screen and (max-width:768px){.rst-versions{width:85%;display:none}.rst-versions.shift{display:block}}.rst-content .toctree-wrapper>p.caption,.rst-content h1,.rst-content h2,.rst-content h3,.rst-content h4,.rst-content h5,.rst-content h6{margin-bottom:24px}.rst-content img{max-width:100%;height:auto}.rst-content div.figure,.rst-content figure{margin-bottom:24px}.rst-content div.figure .caption-text,.rst-content figure .caption-text{font-style:italic}.rst-content div.figure p:last-child.caption,.rst-content figure p:last-child.caption{margin-bottom:0}.rst-content div.figure.align-center,.rst-content figure.align-center{text-align:center}.rst-content .section>a>img,.rst-content .section>img,.rst-content section>a>img,.rst-content section>img{margin-bottom:24px}.rst-content abbr[title]{text-decoration:none}.rst-content.style-external-links a.reference.external:after{font-family:FontAwesome;content:"\f08e";color:#b3b3b3;vertical-align:super;font-size:60%;margin:0 .2em}.rst-content blockquote{margin-left:24px;line-height:24px;margin-bottom:24px}.rst-content pre.literal-block{white-space:pre;margin:0;padding:12px;font-family:SFMono-Regular,Menlo,Monaco,Consolas,Liberation Mono,Courier New,Courier,monospace;display:block;overflow:auto}.rst-content div[class^=highlight],.rst-content pre.literal-block{border:1px solid #e1e4e5;overflow-x:auto;margin:1px 0 24px}.rst-content div[class^=highlight] div[class^=highlight],.rst-content pre.literal-block div[class^=highlight]{padding:0;border:none;margin:0}.rst-content div[class^=highlight] td.code{width:100%}.rst-content .linenodiv pre{border-right:1px solid #e6e9ea;margin:0;padding:12px;font-family:SFMono-Regular,Menlo,Monaco,Consolas,Liberation Mono,Courier New,Courier,monospace;user-select:none;pointer-events:none}.rst-content div[class^=highlight] pre{white-space:pre;margin:0;padding:12px;display:block;overflow:auto}.rst-content div[class^=highlight] pre .hll{display:block;margin:0 -12px;padding:0 12px}.rst-content .linenodiv pre,.rst-content div[class^=highlight] pre,.rst-content pre.literal-block{font-family:SFMono-Regular,Menlo,Monaco,Consolas,Liberation Mono,Courier New,Courier,monospace;font-size:12px;line-height:1.4}.rst-content div.highlight .gp,.rst-content div.highlight span.linenos{user-select:none;pointer-events:none}.rst-content div.highlight span.linenos{display:inline-block;padding-left:0;padding-right:12px;margin-right:12px;border-right:1px solid #e6e9ea}.rst-content .code-block-caption{font-style:italic;font-size:85%;line-height:1;padding:1em 0;text-align:center}@media print{.rst-content .codeblock,.rst-content div[class^=highlight],.rst-content div[class^=highlight] pre{white-space:pre-wrap}}.rst-content .admonition,.rst-content .admonition-todo,.rst-content .attention,.rst-content .caution,.rst-content .danger,.rst-content .error,.rst-content .hint,.rst-content .important,.rst-content .note,.rst-content .seealso,.rst-content .tip,.rst-content .warning{clear:both}.rst-content .admonition-todo .last,.rst-content .admonition-todo>:last-child,.rst-content .admonition .last,.rst-content .admonition>:last-child,.rst-content .attention .last,.rst-content .attention>:last-child,.rst-content .caution .last,.rst-content .caution>:last-child,.rst-content .danger .last,.rst-content .danger>:last-child,.rst-content .error .last,.rst-content .error>:last-child,.rst-content .hint .last,.rst-content .hint>:last-child,.rst-content .important .last,.rst-content .important>:last-child,.rst-content .note .last,.rst-content .note>:last-child,.rst-content .seealso .last,.rst-content .seealso>:last-child,.rst-content .tip .last,.rst-content .tip>:last-child,.rst-content .warning .last,.rst-content .warning>:last-child{margin-bottom:0}.rst-content .admonition-title:before{margin-right:4px}.rst-content .admonition table{border-color:rgba(0,0,0,.1)}.rst-content .admonition table td,.rst-content .admonition table th{background:transparent!important;border-color:rgba(0,0,0,.1)!important}.rst-content .section ol.loweralpha,.rst-content .section ol.loweralpha>li,.rst-content .toctree-wrapper ol.loweralpha,.rst-content .toctree-wrapper ol.loweralpha>li,.rst-content section ol.loweralpha,.rst-content section ol.loweralpha>li{list-style:lower-alpha}.rst-content .section ol.upperalpha,.rst-content .section ol.upperalpha>li,.rst-content .toctree-wrapper ol.upperalpha,.rst-content .toctree-wrapper ol.upperalpha>li,.rst-content section ol.upperalpha,.rst-content section ol.upperalpha>li{list-style:upper-alpha}.rst-content .section ol li>*,.rst-content .section ul li>*,.rst-content .toctree-wrapper ol li>*,.rst-content .toctree-wrapper ul li>*,.rst-content section ol li>*,.rst-content section ul li>*{margin-top:12px;margin-bottom:12px}.rst-content .section ol li>:first-child,.rst-content .section ul li>:first-child,.rst-content .toctree-wrapper ol li>:first-child,.rst-content .toctree-wrapper ul li>:first-child,.rst-content section ol li>:first-child,.rst-content section ul li>:first-child{margin-top:0}.rst-content .section ol li>p,.rst-content .section ol li>p:last-child,.rst-content .section ul li>p,.rst-content .section ul li>p:last-child,.rst-content .toctree-wrapper ol li>p,.rst-content .toctree-wrapper ol li>p:last-child,.rst-content .toctree-wrapper ul li>p,.rst-content .toctree-wrapper ul li>p:last-child,.rst-content section ol li>p,.rst-content section ol li>p:last-child,.rst-content section ul li>p,.rst-content section ul li>p:last-child{margin-bottom:12px}.rst-content .section ol li>p:only-child,.rst-content .section ol li>p:only-child:last-child,.rst-content .section ul li>p:only-child,.rst-content .section ul li>p:only-child:last-child,.rst-content .toctree-wrapper ol li>p:only-child,.rst-content .toctree-wrapper ol li>p:only-child:last-child,.rst-content .toctree-wrapper ul li>p:only-child,.rst-content .toctree-wrapper ul li>p:only-child:last-child,.rst-content section ol li>p:only-child,.rst-content section ol li>p:only-child:last-child,.rst-content section ul li>p:only-child,.rst-content section ul li>p:only-child:last-child{margin-bottom:0}.rst-content .section ol li>ol,.rst-content .section ol li>ul,.rst-content .section ul li>ol,.rst-content .section ul li>ul,.rst-content .toctree-wrapper ol li>ol,.rst-content .toctree-wrapper ol li>ul,.rst-content .toctree-wrapper ul li>ol,.rst-content .toctree-wrapper ul li>ul,.rst-content section ol li>ol,.rst-content section ol li>ul,.rst-content section ul li>ol,.rst-content section ul li>ul{margin-bottom:12px}.rst-content .section ol.simple li>*,.rst-content .section ol.simple li ol,.rst-content .section ol.simple li ul,.rst-content .section ul.simple li>*,.rst-content .section ul.simple li ol,.rst-content .section ul.simple li ul,.rst-content .toctree-wrapper ol.simple li>*,.rst-content .toctree-wrapper ol.simple li ol,.rst-content .toctree-wrapper ol.simple li ul,.rst-content .toctree-wrapper ul.simple li>*,.rst-content .toctree-wrapper ul.simple li ol,.rst-content .toctree-wrapper ul.simple li ul,.rst-content section ol.simple li>*,.rst-content section ol.simple li ol,.rst-content section ol.simple li ul,.rst-content section ul.simple li>*,.rst-content section ul.simple li ol,.rst-content section ul.simple li ul{margin-top:0;margin-bottom:0}.rst-content .line-block{margin-left:0;margin-bottom:24px;line-height:24px}.rst-content .line-block .line-block{margin-left:24px;margin-bottom:0}.rst-content .topic-title{font-weight:700;margin-bottom:12px}.rst-content .toc-backref{color:#404040}.rst-content .align-right{float:right;margin:0 0 24px 24px}.rst-content .align-left{float:left;margin:0 24px 24px 0}.rst-content .align-center{margin:auto}.rst-content .align-center:not(table){display:block}.rst-content .code-block-caption .headerlink,.rst-content .eqno .headerlink,.rst-content .toctree-wrapper>p.caption .headerlink,.rst-content dl dt .headerlink,.rst-content h1 .headerlink,.rst-content h2 .headerlink,.rst-content h3 .headerlink,.rst-content h4 .headerlink,.rst-content h5 .headerlink,.rst-content h6 .headerlink,.rst-content p.caption .headerlink,.rst-content p .headerlink,.rst-content table>caption .headerlink{opacity:0;font-size:14px;font-family:FontAwesome;margin-left:.5em}.rst-content .code-block-caption .headerlink:focus,.rst-content .code-block-caption:hover .headerlink,.rst-content .eqno .headerlink:focus,.rst-content .eqno:hover .headerlink,.rst-content .toctree-wrapper>p.caption .headerlink:focus,.rst-content .toctree-wrapper>p.caption:hover .headerlink,.rst-content dl dt .headerlink:focus,.rst-content dl dt:hover .headerlink,.rst-content h1 .headerlink:focus,.rst-content h1:hover .headerlink,.rst-content h2 .headerlink:focus,.rst-content h2:hover .headerlink,.rst-content h3 .headerlink:focus,.rst-content h3:hover .headerlink,.rst-content h4 .headerlink:focus,.rst-content h4:hover .headerlink,.rst-content h5 .headerlink:focus,.rst-content h5:hover .headerlink,.rst-content h6 .headerlink:focus,.rst-content h6:hover .headerlink,.rst-content p.caption .headerlink:focus,.rst-content p.caption:hover .headerlink,.rst-content p .headerlink:focus,.rst-content p:hover .headerlink,.rst-content table>caption .headerlink:focus,.rst-content table>caption:hover .headerlink{opacity:1}.rst-content p a{overflow-wrap:anywhere}.rst-content .wy-table td p,.rst-content .wy-table td ul,.rst-content .wy-table th p,.rst-content .wy-table th ul,.rst-content table.docutils td p,.rst-content table.docutils td ul,.rst-content table.docutils th p,.rst-content table.docutils th ul,.rst-content table.field-list td p,.rst-content table.field-list td ul,.rst-content table.field-list th p,.rst-content table.field-list th ul{font-size:inherit}.rst-content .btn:focus{outline:2px solid}.rst-content table>caption .headerlink:after{font-size:12px}.rst-content .centered{text-align:center}.rst-content .sidebar{float:right;width:40%;display:block;margin:0 0 24px 24px;padding:24px;background:#f3f6f6;border:1px solid #e1e4e5}.rst-content .sidebar dl,.rst-content .sidebar p,.rst-content .sidebar ul{font-size:90%}.rst-content .sidebar .last,.rst-content .sidebar>:last-child{margin-bottom:0}.rst-content .sidebar .sidebar-title{display:block;font-family:Roboto Slab,ff-tisa-web-pro,Georgia,Arial,sans-serif;font-weight:700;background:#e1e4e5;padding:6px 12px;margin:-24px -24px 24px;font-size:100%}.rst-content .highlighted{background:#f1c40f;box-shadow:0 0 0 2px #f1c40f;display:inline;font-weight:700}.rst-content .citation-reference,.rst-content .footnote-reference{vertical-align:baseline;position:relative;top:-.4em;line-height:0;font-size:90%}.rst-content .citation-reference>span.fn-bracket,.rst-content .footnote-reference>span.fn-bracket{display:none}.rst-content .hlist{width:100%}.rst-content dl dt span.classifier:before{content:" : "}.rst-content dl dt span.classifier-delimiter{display:none!important}html.writer-html4 .rst-content table.docutils.citation,html.writer-html4 .rst-content table.docutils.footnote{background:none;border:none}html.writer-html4 .rst-content table.docutils.citation td,html.writer-html4 .rst-content table.docutils.citation tr,html.writer-html4 .rst-content table.docutils.footnote td,html.writer-html4 .rst-content table.docutils.footnote tr{border:none;background-color:transparent!important;white-space:normal}html.writer-html4 .rst-content table.docutils.citation td.label,html.writer-html4 .rst-content table.docutils.footnote td.label{padding-left:0;padding-right:0;vertical-align:top}html.writer-html5 .rst-content dl.citation,html.writer-html5 .rst-content dl.field-list,html.writer-html5 .rst-content dl.footnote{display:grid;grid-template-columns:auto minmax(80%,95%)}html.writer-html5 .rst-content dl.citation>dt,html.writer-html5 .rst-content dl.field-list>dt,html.writer-html5 .rst-content dl.footnote>dt{display:inline-grid;grid-template-columns:max-content auto}html.writer-html5 .rst-content aside.citation,html.writer-html5 .rst-content aside.footnote,html.writer-html5 .rst-content div.citation{display:grid;grid-template-columns:auto auto minmax(.65rem,auto) minmax(40%,95%)}html.writer-html5 .rst-content aside.citation>span.label,html.writer-html5 .rst-content aside.footnote>span.label,html.writer-html5 .rst-content div.citation>span.label{grid-column-start:1;grid-column-end:2}html.writer-html5 .rst-content aside.citation>span.backrefs,html.writer-html5 .rst-content aside.footnote>span.backrefs,html.writer-html5 .rst-content div.citation>span.backrefs{grid-column-start:2;grid-column-end:3;grid-row-start:1;grid-row-end:3}html.writer-html5 .rst-content aside.citation>p,html.writer-html5 .rst-content aside.footnote>p,html.writer-html5 .rst-content div.citation>p{grid-column-start:4;grid-column-end:5}html.writer-html5 .rst-content dl.citation,html.writer-html5 .rst-content dl.field-list,html.writer-html5 .rst-content dl.footnote{margin-bottom:24px}html.writer-html5 .rst-content dl.citation>dt,html.writer-html5 .rst-content dl.field-list>dt,html.writer-html5 .rst-content dl.footnote>dt{padding-left:1rem}html.writer-html5 .rst-content dl.citation>dd,html.writer-html5 .rst-content dl.citation>dt,html.writer-html5 .rst-content dl.field-list>dd,html.writer-html5 .rst-content dl.field-list>dt,html.writer-html5 .rst-content dl.footnote>dd,html.writer-html5 .rst-content dl.footnote>dt{margin-bottom:0}html.writer-html5 .rst-content dl.citation,html.writer-html5 .rst-content dl.footnote{font-size:.9rem}html.writer-html5 .rst-content dl.citation>dt,html.writer-html5 .rst-content dl.footnote>dt{margin:0 .5rem .5rem 0;line-height:1.2rem;word-break:break-all;font-weight:400}html.writer-html5 .rst-content dl.citation>dt>span.brackets:before,html.writer-html5 .rst-content dl.footnote>dt>span.brackets:before{content:"["}html.writer-html5 .rst-content dl.citation>dt>span.brackets:after,html.writer-html5 .rst-content dl.footnote>dt>span.brackets:after{content:"]"}html.writer-html5 .rst-content dl.citation>dt>span.fn-backref,html.writer-html5 .rst-content dl.footnote>dt>span.fn-backref{text-align:left;font-style:italic;margin-left:.65rem;word-break:break-word;word-spacing:-.1rem;max-width:5rem}html.writer-html5 .rst-content dl.citation>dt>span.fn-backref>a,html.writer-html5 .rst-content dl.footnote>dt>span.fn-backref>a{word-break:keep-all}html.writer-html5 .rst-content dl.citation>dt>span.fn-backref>a:not(:first-child):before,html.writer-html5 .rst-content dl.footnote>dt>span.fn-backref>a:not(:first-child):before{content:" "}html.writer-html5 .rst-content dl.citation>dd,html.writer-html5 .rst-content dl.footnote>dd{margin:0 0 .5rem;line-height:1.2rem}html.writer-html5 .rst-content dl.citation>dd p,html.writer-html5 .rst-content dl.footnote>dd p{font-size:.9rem}html.writer-html5 .rst-content aside.citation,html.writer-html5 .rst-content aside.footnote,html.writer-html5 .rst-content div.citation{padding-left:1rem;padding-right:1rem;font-size:.9rem;line-height:1.2rem}html.writer-html5 .rst-content aside.citation p,html.writer-html5 .rst-content aside.footnote p,html.writer-html5 .rst-content div.citation p{font-size:.9rem;line-height:1.2rem;margin-bottom:12px}html.writer-html5 .rst-content aside.citation span.backrefs,html.writer-html5 .rst-content aside.footnote span.backrefs,html.writer-html5 .rst-content div.citation span.backrefs{text-align:left;font-style:italic;margin-left:.65rem;word-break:break-word;word-spacing:-.1rem;max-width:5rem}html.writer-html5 .rst-content aside.citation span.backrefs>a,html.writer-html5 .rst-content aside.footnote span.backrefs>a,html.writer-html5 .rst-content div.citation span.backrefs>a{word-break:keep-all}html.writer-html5 .rst-content aside.citation span.backrefs>a:not(:first-child):before,html.writer-html5 .rst-content aside.footnote span.backrefs>a:not(:first-child):before,html.writer-html5 .rst-content div.citation span.backrefs>a:not(:first-child):before{content:" "}html.writer-html5 .rst-content aside.citation span.label,html.writer-html5 .rst-content aside.footnote span.label,html.writer-html5 .rst-content div.citation span.label{line-height:1.2rem}html.writer-html5 .rst-content aside.citation-list,html.writer-html5 .rst-content aside.footnote-list,html.writer-html5 .rst-content div.citation-list{margin-bottom:24px}html.writer-html5 .rst-content dl.option-list kbd{font-size:.9rem}.rst-content table.docutils.footnote,html.writer-html4 .rst-content table.docutils.citation,html.writer-html5 .rst-content aside.footnote,html.writer-html5 .rst-content aside.footnote-list aside.footnote,html.writer-html5 .rst-content div.citation-list>div.citation,html.writer-html5 .rst-content dl.citation,html.writer-html5 .rst-content dl.footnote{color:grey}.rst-content table.docutils.footnote code,.rst-content table.docutils.footnote tt,html.writer-html4 .rst-content table.docutils.citation code,html.writer-html4 .rst-content table.docutils.citation tt,html.writer-html5 .rst-content aside.footnote-list aside.footnote code,html.writer-html5 .rst-content aside.footnote-list aside.footnote tt,html.writer-html5 .rst-content aside.footnote code,html.writer-html5 .rst-content aside.footnote tt,html.writer-html5 .rst-content div.citation-list>div.citation code,html.writer-html5 .rst-content div.citation-list>div.citation tt,html.writer-html5 .rst-content dl.citation code,html.writer-html5 .rst-content dl.citation tt,html.writer-html5 .rst-content dl.footnote code,html.writer-html5 .rst-content dl.footnote tt{color:#555}.rst-content .wy-table-responsive.citation,.rst-content .wy-table-responsive.footnote{margin-bottom:0}.rst-content .wy-table-responsive.citation+:not(.citation),.rst-content .wy-table-responsive.footnote+:not(.footnote){margin-top:24px}.rst-content .wy-table-responsive.citation:last-child,.rst-content .wy-table-responsive.footnote:last-child{margin-bottom:24px}.rst-content table.docutils th{border-color:#e1e4e5}html.writer-html5 .rst-content table.docutils th{border:1px solid #e1e4e5}html.writer-html5 .rst-content table.docutils td>p,html.writer-html5 .rst-content table.docutils th>p{line-height:1rem;margin-bottom:0;font-size:.9rem}.rst-content table.docutils td .last,.rst-content table.docutils td .last>:last-child{margin-bottom:0}.rst-content table.field-list,.rst-content table.field-list td{border:none}.rst-content table.field-list td p{line-height:inherit}.rst-content table.field-list td>strong{display:inline-block}.rst-content table.field-list .field-name{padding-right:10px;text-align:left;white-space:nowrap}.rst-content table.field-list .field-body{text-align:left}.rst-content code,.rst-content tt{color:#000;font-family:SFMono-Regular,Menlo,Monaco,Consolas,Liberation Mono,Courier New,Courier,monospace;padding:2px 5px}.rst-content code big,.rst-content code em,.rst-content tt big,.rst-content tt em{font-size:100%!important;line-height:normal}.rst-content code.literal,.rst-content tt.literal{color:#e74c3c;white-space:normal}.rst-content code.xref,.rst-content tt.xref,a .rst-content code,a .rst-content tt{font-weight:700;color:#404040;overflow-wrap:normal}.rst-content kbd,.rst-content pre,.rst-content samp{font-family:SFMono-Regular,Menlo,Monaco,Consolas,Liberation Mono,Courier New,Courier,monospace}.rst-content a code,.rst-content a tt{color:#2980b9}.rst-content dl{margin-bottom:24px}.rst-content dl dt{font-weight:700;margin-bottom:12px}.rst-content dl ol,.rst-content dl p,.rst-content dl table,.rst-content dl ul{margin-bottom:12px}.rst-content dl dd{margin:0 0 12px 24px;line-height:24px}.rst-content dl dd>ol:last-child,.rst-content dl dd>p:last-child,.rst-content dl dd>table:last-child,.rst-content dl dd>ul:last-child{margin-bottom:0}html.writer-html4 .rst-content dl:not(.docutils),html.writer-html5 .rst-content dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple){margin-bottom:24px}html.writer-html4 .rst-content dl:not(.docutils)>dt,html.writer-html5 .rst-content dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple)>dt{display:table;margin:6px 0;font-size:90%;line-height:normal;background:#e7f2fa;color:#2980b9;border-top:3px solid #6ab0de;padding:6px;position:relative}html.writer-html4 .rst-content dl:not(.docutils)>dt:before,html.writer-html5 .rst-content dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple)>dt:before{color:#6ab0de}html.writer-html4 .rst-content dl:not(.docutils)>dt .headerlink,html.writer-html5 .rst-content dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple)>dt .headerlink{color:#404040;font-size:100%!important}html.writer-html4 .rst-content dl:not(.docutils) dl:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple)>dt,html.writer-html5 .rst-content dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple) dl:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple)>dt{margin-bottom:6px;border:none;border-left:3px solid #ccc;background:#f0f0f0;color:#555}html.writer-html4 .rst-content dl:not(.docutils) dl:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple)>dt .headerlink,html.writer-html5 .rst-content dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple) dl:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple)>dt .headerlink{color:#404040;font-size:100%!important}html.writer-html4 .rst-content dl:not(.docutils)>dt:first-child,html.writer-html5 .rst-content dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple)>dt:first-child{margin-top:0}html.writer-html4 .rst-content dl:not(.docutils) code.descclassname,html.writer-html4 .rst-content dl:not(.docutils) code.descname,html.writer-html4 .rst-content dl:not(.docutils) tt.descclassname,html.writer-html4 .rst-content dl:not(.docutils) tt.descname,html.writer-html5 .rst-content dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple) code.descclassname,html.writer-html5 .rst-content dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple) code.descname,html.writer-html5 .rst-content dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple) tt.descclassname,html.writer-html5 .rst-content dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple) tt.descname{background-color:transparent;border:none;padding:0;font-size:100%!important}html.writer-html4 .rst-content dl:not(.docutils) code.descname,html.writer-html4 .rst-content dl:not(.docutils) tt.descname,html.writer-html5 .rst-content dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple) code.descname,html.writer-html5 .rst-content dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple) tt.descname{font-weight:700}html.writer-html4 .rst-content dl:not(.docutils) .optional,html.writer-html5 .rst-content dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple) .optional{display:inline-block;padding:0 4px;color:#000;font-weight:700}html.writer-html4 .rst-content dl:not(.docutils) .property,html.writer-html5 .rst-content dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple) .property{display:inline-block;padding-right:8px;max-width:100%}html.writer-html4 .rst-content dl:not(.docutils) .k,html.writer-html5 .rst-content dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple) .k{font-style:italic}html.writer-html4 .rst-content dl:not(.docutils) .descclassname,html.writer-html4 .rst-content dl:not(.docutils) .descname,html.writer-html4 .rst-content dl:not(.docutils) .sig-name,html.writer-html5 .rst-content dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple) .descclassname,html.writer-html5 .rst-content dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple) .descname,html.writer-html5 .rst-content dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple) .sig-name{font-family:SFMono-Regular,Menlo,Monaco,Consolas,Liberation Mono,Courier New,Courier,monospace;color:#000}.rst-content .viewcode-back,.rst-content .viewcode-link{display:inline-block;color:#27ae60;font-size:80%;padding-left:24px}.rst-content .viewcode-back{display:block;float:right}.rst-content p.rubric{margin-bottom:12px;font-weight:700}.rst-content code.download,.rst-content tt.download{background:inherit;padding:inherit;font-weight:400;font-family:inherit;font-size:inherit;color:inherit;border:inherit;white-space:inherit}.rst-content code.download span:first-child,.rst-content tt.download span:first-child{-webkit-font-smoothing:subpixel-antialiased}.rst-content code.download span:first-child:before,.rst-content tt.download span:first-child:before{margin-right:4px}.rst-content .guilabel{border:1px solid #7fbbe3;background:#e7f2fa;font-size:80%;font-weight:700;border-radius:4px;padding:2.4px 6px;margin:auto 2px}.rst-content :not(dl.option-list)>:not(dt):not(kbd):not(.kbd)>.kbd,.rst-content :not(dl.option-list)>:not(dt):not(kbd):not(.kbd)>kbd{color:inherit;font-size:80%;background-color:#fff;border:1px solid #a6a6a6;border-radius:4px;box-shadow:0 2px grey;padding:2.4px 6px;margin:auto 0}.rst-content .versionmodified{font-style:italic}@media screen and (max-width:480px){.rst-content .sidebar{width:100%}}span[id*=MathJax-Span]{color:#404040}.math{text-align:center}@font-face{font-family:Lato;src:url(fonts/lato-normal.woff2?bd03a2cc277bbbc338d464e679fe9942) format("woff2"),url(fonts/lato-normal.woff?27bd77b9162d388cb8d4c4217c7c5e2a) format("woff");font-weight:400;font-style:normal;font-display:block}@font-face{font-family:Lato;src:url(fonts/lato-bold.woff2?cccb897485813c7c256901dbca54ecf2) format("woff2"),url(fonts/lato-bold.woff?d878b6c29b10beca227e9eef4246111b) format("woff");font-weight:700;font-style:normal;font-display:block}@font-face{font-family:Lato;src:url(fonts/lato-bold-italic.woff2?0b6bb6725576b072c5d0b02ecdd1900d) format("woff2"),url(fonts/lato-bold-italic.woff?9c7e4e9eb485b4a121c760e61bc3707c) format("woff");font-weight:700;font-style:italic;font-display:block}@font-face{font-family:Lato;src:url(fonts/lato-normal-italic.woff2?4eb103b4d12be57cb1d040ed5e162e9d) format("woff2"),url(fonts/lato-normal-italic.woff?f28f2d6482446544ef1ea1ccc6dd5892) format("woff");font-weight:400;font-style:italic;font-display:block}@font-face{font-family:Roboto Slab;font-style:normal;font-weight:400;src:url(fonts/Roboto-Slab-Regular.woff2?7abf5b8d04d26a2cafea937019bca958) format("woff2"),url(fonts/Roboto-Slab-Regular.woff?c1be9284088d487c5e3ff0a10a92e58c) format("woff");font-display:block}@font-face{font-family:Roboto Slab;font-style:normal;font-weight:700;src:url(fonts/Roboto-Slab-Bold.woff2?9984f4a9bda09be08e83f2506954adbe) format("woff2"),url(fonts/Roboto-Slab-Bold.woff?bed5564a116b05148e3b3bea6fb1162a) format("woff");font-display:block} diff --git a/css/theme_extra.css b/css/theme_extra.css new file mode 100644 index 00000000..9f4b063c --- /dev/null +++ b/css/theme_extra.css @@ -0,0 +1,191 @@ +/* + * Wrap inline code samples otherwise they shoot of the side and + * can't be read at all. + * + * https://github.com/mkdocs/mkdocs/issues/313 + * https://github.com/mkdocs/mkdocs/issues/233 + * https://github.com/mkdocs/mkdocs/issues/834 + */ +.rst-content code { + white-space: pre-wrap; + word-wrap: break-word; + padding: 2px 5px; +} + +/** + * Make code blocks display as blocks and give them the appropriate + * font size and padding. + * + * https://github.com/mkdocs/mkdocs/issues/855 + * https://github.com/mkdocs/mkdocs/issues/834 + * https://github.com/mkdocs/mkdocs/issues/233 + */ +.rst-content pre code { + white-space: pre; + word-wrap: normal; + display: block; + padding: 12px; + font-size: 12px; +} + +/** + * Fix code colors + * + * https://github.com/mkdocs/mkdocs/issues/2027 + */ +.rst-content code { + color: #E74C3C; +} + +.rst-content pre code { + color: #000; + background: #f8f8f8; +} + +/* + * Fix link colors when the link text is inline code. + * + * https://github.com/mkdocs/mkdocs/issues/718 + */ +a code { + color: #2980B9; +} +a:hover code { + color: #3091d1; +} +a:visited code { + color: #9B59B6; +} + +/* + * The CSS classes from highlight.js seem to clash with the + * ReadTheDocs theme causing some code to be incorrectly made + * bold and italic. + * + * https://github.com/mkdocs/mkdocs/issues/411 + */ +pre .cs, pre .c { + font-weight: inherit; + font-style: inherit; +} + +/* + * Fix some issues with the theme and non-highlighted code + * samples. Without and highlighting styles attached the + * formatting is broken. + * + * https://github.com/mkdocs/mkdocs/issues/319 + */ +.rst-content .no-highlight { + display: block; + padding: 0.5em; + color: #333; +} + + +/* + * Additions specific to the search functionality provided by MkDocs + */ + +.search-results { + margin-top: 23px; +} + +.search-results article { + border-top: 1px solid #E1E4E5; + padding-top: 24px; +} + +.search-results article:first-child { + border-top: none; +} + +form .search-query { + width: 100%; + border-radius: 50px; + padding: 6px 12px; /* csslint allow: box-model */ + border-color: #D1D4D5; +} + +/* + * Improve inline code blocks within admonitions. + * + * https://github.com/mkdocs/mkdocs/issues/656 + */ + .rst-content .admonition code { + color: #404040; + border: 1px solid #c7c9cb; + border: 1px solid rgba(0, 0, 0, 0.2); + background: #f8fbfd; + background: rgba(255, 255, 255, 0.7); +} + +/* + * Account for wide tables which go off the side. + * Override borders to avoid weirdness on narrow tables. + * + * https://github.com/mkdocs/mkdocs/issues/834 + * https://github.com/mkdocs/mkdocs/pull/1034 + */ +.rst-content .section .docutils { + width: 100%; + overflow: auto; + display: block; + border: none; +} + +td, th { + border: 1px solid #e1e4e5 !important; /* csslint allow: important */ + border-collapse: collapse; +} + +/* + * Without the following amendments, the navigation in the theme will be + * slightly cut off. This is due to the fact that the .wy-nav-side has a + * padding-bottom of 2em, which must not necessarily align with the font-size of + * 90 % on the .rst-current-version container, combined with the padding of 12px + * above and below. These amendments fix this in two steps: First, make sure the + * .rst-current-version container has a fixed height of 40px, achieved using + * line-height, and then applying a padding-bottom of 40px to this container. In + * a second step, the items within that container are re-aligned using flexbox. + * + * https://github.com/mkdocs/mkdocs/issues/2012 + */ + .wy-nav-side { + padding-bottom: 40px; +} + +/* + * The second step of above amendment: Here we make sure the items are aligned + * correctly within the .rst-current-version container. Using flexbox, we + * achieve it in such a way that it will look like the following: + * + * [No repo_name] + * Next >> // On the first page + * << Previous Next >> // On all subsequent pages + * + * [With repo_name] + * Next >> // On the first page + * << Previous Next >> // On all subsequent pages + * + * https://github.com/mkdocs/mkdocs/issues/2012 + */ +.rst-versions .rst-current-version { + padding: 0 12px; + display: flex; + font-size: initial; + justify-content: space-between; + align-items: center; + line-height: 40px; +} + +/* + * Please note that this amendment also involves removing certain inline-styles + * from the file ./mkdocs/themes/readthedocs/versions.html. + * + * https://github.com/mkdocs/mkdocs/issues/2012 + */ +.rst-current-version span { + flex: 1; + text-align: center; +} diff --git a/gen_ref_pages.py b/gen_ref_pages.py new file mode 100644 index 00000000..41da191b --- /dev/null +++ b/gen_ref_pages.py @@ -0,0 +1,36 @@ +"""Generate the code reference pages and navigation. + +Script was taken from +https://mkdocstrings.github.io/recipes/#automatic-code-reference-pages +""" + +from pathlib import Path + +import mkdocs_gen_files + +nav = mkdocs_gen_files.Nav() + +for path in sorted(Path(".").rglob("quinn/**/*.py")): + module_path = path.relative_to(".").with_suffix("") + doc_path = path.relative_to(".").with_suffix(".md") + full_doc_path = Path("reference", doc_path) + + parts = tuple(module_path.parts) + + if parts[-1] == "__init__": + parts = parts[:-1] + doc_path = doc_path.with_name("index.md") + full_doc_path = full_doc_path.with_name("index.md") + elif parts[-1] == "__main__": + continue + + nav[parts] = doc_path.as_posix() # + + with mkdocs_gen_files.open(full_doc_path, "w") as fd: + ident = ".".join(parts) + fd.write(f"::: {ident}") + + mkdocs_gen_files.set_edit_path(full_doc_path, path) + +with mkdocs_gen_files.open("reference/SUMMARY.md", "w") as nav_file: + nav_file.writelines(nav.build_literate_nav()) diff --git a/img/favicon.ico b/img/favicon.ico new file mode 100644 index 00000000..e85006a3 Binary files /dev/null and b/img/favicon.ico differ diff --git a/index.html b/index.html new file mode 100644 index 00000000..a60dca73 --- /dev/null +++ b/index.html @@ -0,0 +1,480 @@ + + + + + + + + Quinn + + + + + + + + + + + + + + +
+ + +
+ +
+
+
    +
  • + +
  • +
  • +
+
+
+
+
+ +

Quinn

+

![image](https://github.com/MrPowers/quinn/workflows/build/badge.svg) +![image](https://github.com/MrPowers/quinn/workflows/build/badge.svg) +PyPI - Downloads +PyPI version

+

Pyspark helper methods to maximize developer productivity.

+

Quinn provides DataFrame validation functions, useful column functions / DataFrame transformations, and performant helper functions.

+

quinn

+

Setup

+

Quinn is uploaded to PyPi and can be installed with this command:

+
pip install quinn
+
+

Quinn Helper Functions

+
import quinn
+
+

DataFrame Validations

+

validate_presence_of_columns()

+
quinn.validate_presence_of_columns(source_df, ["name", "age", "fun"])
+
+

Raises an exception unless source_df contains the name, age, and fun column.

+

validate_schema()

+
quinn.validate_schema(source_df, required_schema)
+
+

Raises an exception unless source_df contains all the StructFields defined in the required_schema.

+

validate_absence_of_columns()

+
quinn.validate_absence_of_columns(source_df, ["age", "cool"])
+
+

Raises an exception if source_df contains age or cool columns.

+

Functions

+

single_space()

+
actual_df = source_df.withColumn(
+    "words_single_spaced",
+    quinn.single_space(col("words"))
+)
+
+

Replaces all multispaces with single spaces (e.g. changes "this has some" to "this has some".

+

remove_all_whitespace()

+
actual_df = source_df.withColumn(
+    "words_without_whitespace",
+    quinn.remove_all_whitespace(col("words"))
+)
+
+

Removes all whitespace in a string (e.g. changes "this has some" to "thishassome".

+

anti_trim()

+
actual_df = source_df.withColumn(
+    "words_anti_trimmed",
+    quinn.anti_trim(col("words"))
+)
+
+

Removes all inner whitespace, but doesn't delete leading or trailing whitespace (e.g. changes " this has some " to " thishassome ".

+

remove_non_word_characters()

+
actual_df = source_df.withColumn(
+    "words_without_nonword_chars",
+    quinn.remove_non_word_characters(col("words"))
+)
+
+

Removes all non-word characters from a string (e.g. changes "si%$#@!#$!@#mpsons" to "simpsons".

+

multi_equals()

+
source_df.withColumn(
+    "are_s1_and_s2_cat",
+    quinn.multi_equals("cat")(col("s1"), col("s2"))
+)
+
+

multi_equals returns true if s1 and s2 are both equal to "cat".

+

approx_equal()

+

This function takes 3 arguments which are 2 Pyspark DataFrames and one integer values as threshold, and returns the Boolean column which tells if the columns are equal in the threshold.

+
let the columns be
+col1 = [1.2, 2.5, 3.1, 4.0, 5.5]
+col2 = [1.3, 2.3, 3.0, 3.9, 5.6]
+threshold = 0.2
+
+result = approx_equal(col("col1"), col("col2"), threshold)
+result.show()
+
++-----+
+|value|
++-----+
+| true|
+|false|
+| true|
+| true|
+| true|
++-----+
+
+

array_choice()

+

This function takes a Column as a parameter and returns a PySpark column that contains a random value from the input column parameter

+
df = spark.createDataFrame([(1,), (2,), (3,), (4,), (5,)], ["values"])
+result = df.select(array_choice(col("values")))
+
+The output is :=
++--------------+
+|array_choice()|
++--------------+
+|             2|
++--------------+
+
+
+

regexp_extract_all()

+

The regexp_extract_all takes 2 parameters String s and regexp which is a regular expression. This function finds all the matches for the string which satisfies the regular expression.

+
print(regexp_extract_all("this is a example text message for testing application",r"\b\w*a\w*\b"))
+
+The output is :=
+['a', 'example', 'message', 'application']
+
+
+

Where r"\b\w*a\w*\b" pattern checks for words containing letter a

+

week_start_date()

+

It takes 2 parameters, column and week_start_day. It returns a Spark Dataframe column which contains the start date of the week. By default the week_start_day is set to "Sun".

+

For input ["2023-03-05", "2023-03-06", "2023-03-07", "2023-03-08"] the Output is

+
result = df.select("date", week_start_date(col("date"), "Sun"))
+result.show()
++----------+----------------+
+|      date|week_start_date |
++----------+----------------+
+|2023-03-05|      2023-03-05|
+|2023-03-07|      2023-03-05|
+|2023-03-08|      2023-03-05|
++----------+----------------+
+
+

week_end_date()

+

It also takes 2 Paramters as Column and week_end_day, and returns the dateframe column which contains the end date of the week. By default the week_end_day is set to "sat"

+
+---------+-------------+
+      date|week_end_date|
++---------+-------------+
+2023-03-05|   2023-03-05|
+2023-03-07|   2023-03-12|
+2023-03-08|   2023-03-12|
++---------+-------------+
+
+
+

uuid5()

+

This function generates UUIDv5 in string form from the passed column and optionally namespace and optional extra salt. +By default namespace is NAMESPACE_DNS UUID and no extra string used to reduce hash collisions.

+

+df = spark.createDataFrame([("lorem",), ("ipsum",)], ["values"])
+result = df.select(quinn.uuid5(F.col("values")).alias("uuid5"))
+result.show(truncate=False)
+
+The output is :=
++------------------------------------+
+|uuid5                               |
++------------------------------------+
+|35482fda-c10a-5076-8da2-dc7bf22d6be4|
+|51b79c1d-d06c-5b30-a5c6-1fadcd3b2103|
++------------------------------------+
+
+
+

Transformations

+

snake_case_col_names()

+
quinn.snake_case_col_names(source_df)
+
+

Converts all the column names in a DataFrame to snake_case. It's annoying to write SQL queries when columns aren't snake cased.

+

sort_columns()

+
quinn.sort_columns(df=source_df, sort_order="asc", sort_nested=True)
+
+

Sorts the DataFrame columns in alphabetical order, including nested columns if sort_nested is set to True. Wide DataFrames are easier to navigate when they're sorted alphabetically.

+

DataFrame Helpers

+

column_to_list()

+
quinn.column_to_list(source_df, "name")
+
+

Converts a column in a DataFrame to a list of values.

+

two_columns_to_dictionary()

+
quinn.two_columns_to_dictionary(source_df, "name", "age")
+
+

Converts two columns of a DataFrame into a dictionary. In this example, name is the key and age is the value.

+

to_list_of_dictionaries()

+
quinn.to_list_of_dictionaries(source_df)
+
+

Converts an entire DataFrame into a list of dictionaries.

+

show_output_to_df()

+
quinn.show_output_to_df(output_str, spark)
+
+

Parses a spark DataFrame output string into a spark DataFrame. Useful for quickly pulling data from a log into a DataFrame. In this example, output_str is a string of the form:

+
+----+---+-----------+------+
+|name|age|     stuff1|stuff2|
++----+---+-----------+------+
+|jose|  1|nice person|  yoyo|
+|  li|  2|nice person|  yoyo|
+| liz|  3|nice person|  yoyo|
++----+---+-----------+------+
+
+

Schema Helpers

+

schema_from_csv()

+
quinn.schema_from_csv("schema.csv")
+
+

Converts a CSV file into a PySpark schema (aka StructType). The CSV must contain the column name and type. The nullable and metadata columns are optional.

+

Here's an example CSV file:

+
name,type
+person,string
+address,string
+phoneNumber,string
+age,int
+
+

Here's how to convert that CSV file to a PySpark schema:

+
schema = schema_from_csv(spark, "some_file.csv")
+
+StructType([
+    StructField("person", StringType(), True),
+    StructField("address", StringType(), True),
+    StructField("phoneNumber", StringType(), True),
+    StructField("age", IntegerType(), True),
+])
+
+

Here's a more complex CSV file:

+
name,type,nullable,metadata
+person,string,false,{"description":"The person's name"}
+address,string
+phoneNumber,string,TRUE,{"description":"The person's phone number"}
+age,int,False
+
+

Here's how to read this CSV file into a PySpark schema:

+
another_schema = schema_from_csv(spark, "some_file.csv")
+
+StructType([
+    StructField("person", StringType(), False, {"description": "The person's name"}),
+    StructField("address", StringType(), True),
+    StructField("phoneNumber", StringType(), True, {"description": "The person's phone number"}),
+    StructField("age", IntegerType(), False),
+])
+
+

print_schema_as_code()

+
fields = [
+    StructField("simple_int", IntegerType()),
+    StructField("decimal_with_nums", DecimalType(19, 8)),
+    StructField("array", ArrayType(FloatType()))
+]
+schema = StructType(fields)
+printable_schema: str = quinn.print_schema_as_code(schema)
+
+

Converts a Spark DataType to a string of Python code that can be evaluated as code using eval(). If the DataType is a StructType, this can be used to print an existing schema in a format that can be copy-pasted into a Python script, log to a file, etc.

+

For example:

+
print(printable_schema)
+
+
StructType(
+    fields=[
+        StructField("simple_int", IntegerType(), True),
+        StructField("decimal_with_nums", DecimalType(19, 8), True),
+        StructField(
+            "array",
+            ArrayType(FloatType()),
+            True,
+        ),
+    ]
+)
+
+

Once evaluated, the printable schema is a valid schema that can be used in dataframe creation, validation, etc.

+
from chispa.schema_comparer import assert_basic_schema_equality
+
+parsed_schema = eval(printable_schema)
+assert_basic_schema_equality(parsed_schema, schema) # passes
+
+

print_schema_as_code() can also be used to print other DataType objects.

+

ArrayType

+
array_type = ArrayType(FloatType())
+printable_type: str = quinn.print_schema_as_code(array_type)
+print(printable_type)
+ ```
+
+ ```
+ArrayType(FloatType())
+ ```
+
+`MapType`
+```python
+map_type = MapType(StringType(), FloatType())
+printable_type: str = quinn.print_schema_as_code(map_type)
+print(printable_type)
+ ```
+
+ ```
+MapType(
+        StringType(),
+        FloatType(),
+        True,
+)
+ ```
+
+`IntegerType`, `StringType` etc.
+```python
+integer_type = IntegerType()
+printable_type: str = quinn.print_schema_as_code(integer_type)
+print(printable_type)
+ ```
+
+ ```
+IntegerType()
+ ```
+
+## Pyspark Core Class Extensions
+
+
+

from quinn.extensions import *

+

+### Column Extensions
+
+**isFalsy()**
+
+```python
+source_df.withColumn("is_stuff_falsy", F.col("has_stuff").isFalsy())
+
+

Returns True if has_stuff is None or False.

+

isTruthy()

+
source_df.withColumn("is_stuff_truthy", F.col("has_stuff").isTruthy())
+
+

Returns True unless has_stuff is None or False.

+

isNullOrBlank()

+
source_df.withColumn("is_blah_null_or_blank", F.col("blah").isNullOrBlank())
+
+

Returns True if blah is null or blank (the empty string or a string that only contains whitespace).

+

isNotIn()

+
source_df.withColumn("is_not_bobs_hobby", F.col("fun_thing").isNotIn(bobs_hobbies))
+
+

Returns True if fun_thing is not included in the bobs_hobbies list.

+

nullBetween()

+
source_df.withColumn("is_between", F.col("age").nullBetween(F.col("lower_age"), F.col("upper_age")))
+
+

Returns True if age is between lower_age and upper_age. If lower_age is populated and upper_age is null, it will return True if age is greater than or equal to lower_age. If lower_age is null and upper_age is populate, it will return True if age is lower than or equal to upper_age.

+

Contributing

+

We are actively looking for feature requests, pull requests, and bug fixes.

+

Any developer that demonstrates excellence will be invited to be a maintainer of the project.

+

Code Style

+

We are using PySpark code-style and sphinx as docstrings format. For more details about sphinx format see this tutorial. A short example of sphinx-formated docstring is placed below:

+
"""[Summary]
+
+:param [ParamName]: [ParamDescription], defaults to [DefaultParamVal]
+:type [ParamName]: [ParamType](, optional)
+...
+:raises [ErrorType]: [ErrorDescription]
+...
+:return: [ReturnDescription]
+:rtype: [ReturnType]
+"""
+
+ +
+
+ +
+
+ +
+ +
+ +
+ + + + + Next » + + +
+ + + + + + + + + + + diff --git a/js/html5shiv.min.js b/js/html5shiv.min.js new file mode 100644 index 00000000..1a01c94b --- /dev/null +++ b/js/html5shiv.min.js @@ -0,0 +1,4 @@ +/** +* @preserve HTML5 Shiv 3.7.3 | @afarkas @jdalton @jon_neal @rem | MIT/GPL2 Licensed +*/ +!function(a,b){function c(a,b){var c=a.createElement("p"),d=a.getElementsByTagName("head")[0]||a.documentElement;return c.innerHTML="x",d.insertBefore(c.lastChild,d.firstChild)}function d(){var a=t.elements;return"string"==typeof a?a.split(" "):a}function e(a,b){var c=t.elements;"string"!=typeof c&&(c=c.join(" ")),"string"!=typeof a&&(a=a.join(" ")),t.elements=c+" "+a,j(b)}function f(a){var b=s[a[q]];return b||(b={},r++,a[q]=r,s[r]=b),b}function g(a,c,d){if(c||(c=b),l)return c.createElement(a);d||(d=f(c));var e;return e=d.cache[a]?d.cache[a].cloneNode():p.test(a)?(d.cache[a]=d.createElem(a)).cloneNode():d.createElem(a),!e.canHaveChildren||o.test(a)||e.tagUrn?e:d.frag.appendChild(e)}function h(a,c){if(a||(a=b),l)return a.createDocumentFragment();c=c||f(a);for(var e=c.frag.cloneNode(),g=0,h=d(),i=h.length;i>g;g++)e.createElement(h[g]);return e}function i(a,b){b.cache||(b.cache={},b.createElem=a.createElement,b.createFrag=a.createDocumentFragment,b.frag=b.createFrag()),a.createElement=function(c){return t.shivMethods?g(c,a,b):b.createElem(c)},a.createDocumentFragment=Function("h,f","return function(){var n=f.cloneNode(),c=n.createElement;h.shivMethods&&("+d().join().replace(/[\w\-:]+/g,function(a){return b.createElem(a),b.frag.createElement(a),'c("'+a+'")'})+");return n}")(t,b.frag)}function j(a){a||(a=b);var d=f(a);return!t.shivCSS||k||d.hasCSS||(d.hasCSS=!!c(a,"article,aside,dialog,figcaption,figure,footer,header,hgroup,main,nav,section{display:block}mark{background:#FF0;color:#000}template{display:none}")),l||i(a,d),a}var k,l,m="3.7.3",n=a.html5||{},o=/^<|^(?:button|map|select|textarea|object|iframe|option|optgroup)$/i,p=/^(?:a|b|code|div|fieldset|h1|h2|h3|h4|h5|h6|i|label|li|ol|p|q|span|strong|style|table|tbody|td|th|tr|ul)$/i,q="_html5shiv",r=0,s={};!function(){try{var a=b.createElement("a");a.innerHTML="",k="hidden"in a,l=1==a.childNodes.length||function(){b.createElement("a");var a=b.createDocumentFragment();return"undefined"==typeof a.cloneNode||"undefined"==typeof a.createDocumentFragment||"undefined"==typeof a.createElement}()}catch(c){k=!0,l=!0}}();var t={elements:n.elements||"abbr article aside audio bdi canvas data datalist details dialog figcaption figure footer header hgroup main mark meter nav output picture progress section summary template time video",version:m,shivCSS:n.shivCSS!==!1,supportsUnknownElements:l,shivMethods:n.shivMethods!==!1,type:"default",shivDocument:j,createElement:g,createDocumentFragment:h,addElements:e};a.html5=t,j(b),"object"==typeof module&&module.exports&&(module.exports=t)}("undefined"!=typeof window?window:this,document); diff --git a/js/jquery-3.6.0.min.js b/js/jquery-3.6.0.min.js new file mode 100644 index 00000000..c4c6022f --- /dev/null +++ b/js/jquery-3.6.0.min.js @@ -0,0 +1,2 @@ +/*! jQuery v3.6.0 | (c) OpenJS Foundation and other contributors | jquery.org/license */ +!function(e,t){"use strict";"object"==typeof module&&"object"==typeof module.exports?module.exports=e.document?t(e,!0):function(e){if(!e.document)throw new Error("jQuery requires a window with a document");return t(e)}:t(e)}("undefined"!=typeof window?window:this,function(C,e){"use strict";var t=[],r=Object.getPrototypeOf,s=t.slice,g=t.flat?function(e){return t.flat.call(e)}:function(e){return t.concat.apply([],e)},u=t.push,i=t.indexOf,n={},o=n.toString,v=n.hasOwnProperty,a=v.toString,l=a.call(Object),y={},m=function(e){return"function"==typeof e&&"number"!=typeof e.nodeType&&"function"!=typeof e.item},x=function(e){return null!=e&&e===e.window},E=C.document,c={type:!0,src:!0,nonce:!0,noModule:!0};function b(e,t,n){var r,i,o=(n=n||E).createElement("script");if(o.text=e,t)for(r in c)(i=t[r]||t.getAttribute&&t.getAttribute(r))&&o.setAttribute(r,i);n.head.appendChild(o).parentNode.removeChild(o)}function w(e){return null==e?e+"":"object"==typeof e||"function"==typeof e?n[o.call(e)]||"object":typeof e}var f="3.6.0",S=function(e,t){return new S.fn.init(e,t)};function p(e){var t=!!e&&"length"in e&&e.length,n=w(e);return!m(e)&&!x(e)&&("array"===n||0===t||"number"==typeof t&&0+~]|"+M+")"+M+"*"),U=new RegExp(M+"|>"),X=new RegExp(F),V=new RegExp("^"+I+"$"),G={ID:new RegExp("^#("+I+")"),CLASS:new RegExp("^\\.("+I+")"),TAG:new RegExp("^("+I+"|[*])"),ATTR:new RegExp("^"+W),PSEUDO:new RegExp("^"+F),CHILD:new RegExp("^:(only|first|last|nth|nth-last)-(child|of-type)(?:\\("+M+"*(even|odd|(([+-]|)(\\d*)n|)"+M+"*(?:([+-]|)"+M+"*(\\d+)|))"+M+"*\\)|)","i"),bool:new RegExp("^(?:"+R+")$","i"),needsContext:new RegExp("^"+M+"*[>+~]|:(even|odd|eq|gt|lt|nth|first|last)(?:\\("+M+"*((?:-\\d)?\\d*)"+M+"*\\)|)(?=[^-]|$)","i")},Y=/HTML$/i,Q=/^(?:input|select|textarea|button)$/i,J=/^h\d$/i,K=/^[^{]+\{\s*\[native \w/,Z=/^(?:#([\w-]+)|(\w+)|\.([\w-]+))$/,ee=/[+~]/,te=new RegExp("\\\\[\\da-fA-F]{1,6}"+M+"?|\\\\([^\\r\\n\\f])","g"),ne=function(e,t){var n="0x"+e.slice(1)-65536;return t||(n<0?String.fromCharCode(n+65536):String.fromCharCode(n>>10|55296,1023&n|56320))},re=/([\0-\x1f\x7f]|^-?\d)|^-$|[^\0-\x1f\x7f-\uFFFF\w-]/g,ie=function(e,t){return t?"\0"===e?"\ufffd":e.slice(0,-1)+"\\"+e.charCodeAt(e.length-1).toString(16)+" ":"\\"+e},oe=function(){T()},ae=be(function(e){return!0===e.disabled&&"fieldset"===e.nodeName.toLowerCase()},{dir:"parentNode",next:"legend"});try{H.apply(t=O.call(p.childNodes),p.childNodes),t[p.childNodes.length].nodeType}catch(e){H={apply:t.length?function(e,t){L.apply(e,O.call(t))}:function(e,t){var n=e.length,r=0;while(e[n++]=t[r++]);e.length=n-1}}}function se(t,e,n,r){var i,o,a,s,u,l,c,f=e&&e.ownerDocument,p=e?e.nodeType:9;if(n=n||[],"string"!=typeof t||!t||1!==p&&9!==p&&11!==p)return n;if(!r&&(T(e),e=e||C,E)){if(11!==p&&(u=Z.exec(t)))if(i=u[1]){if(9===p){if(!(a=e.getElementById(i)))return n;if(a.id===i)return n.push(a),n}else if(f&&(a=f.getElementById(i))&&y(e,a)&&a.id===i)return n.push(a),n}else{if(u[2])return H.apply(n,e.getElementsByTagName(t)),n;if((i=u[3])&&d.getElementsByClassName&&e.getElementsByClassName)return H.apply(n,e.getElementsByClassName(i)),n}if(d.qsa&&!N[t+" "]&&(!v||!v.test(t))&&(1!==p||"object"!==e.nodeName.toLowerCase())){if(c=t,f=e,1===p&&(U.test(t)||z.test(t))){(f=ee.test(t)&&ye(e.parentNode)||e)===e&&d.scope||((s=e.getAttribute("id"))?s=s.replace(re,ie):e.setAttribute("id",s=S)),o=(l=h(t)).length;while(o--)l[o]=(s?"#"+s:":scope")+" "+xe(l[o]);c=l.join(",")}try{return H.apply(n,f.querySelectorAll(c)),n}catch(e){N(t,!0)}finally{s===S&&e.removeAttribute("id")}}}return g(t.replace($,"$1"),e,n,r)}function ue(){var r=[];return function e(t,n){return r.push(t+" ")>b.cacheLength&&delete e[r.shift()],e[t+" "]=n}}function le(e){return e[S]=!0,e}function ce(e){var t=C.createElement("fieldset");try{return!!e(t)}catch(e){return!1}finally{t.parentNode&&t.parentNode.removeChild(t),t=null}}function fe(e,t){var n=e.split("|"),r=n.length;while(r--)b.attrHandle[n[r]]=t}function pe(e,t){var n=t&&e,r=n&&1===e.nodeType&&1===t.nodeType&&e.sourceIndex-t.sourceIndex;if(r)return r;if(n)while(n=n.nextSibling)if(n===t)return-1;return e?1:-1}function de(t){return function(e){return"input"===e.nodeName.toLowerCase()&&e.type===t}}function he(n){return function(e){var t=e.nodeName.toLowerCase();return("input"===t||"button"===t)&&e.type===n}}function ge(t){return function(e){return"form"in e?e.parentNode&&!1===e.disabled?"label"in e?"label"in e.parentNode?e.parentNode.disabled===t:e.disabled===t:e.isDisabled===t||e.isDisabled!==!t&&ae(e)===t:e.disabled===t:"label"in e&&e.disabled===t}}function ve(a){return le(function(o){return o=+o,le(function(e,t){var n,r=a([],e.length,o),i=r.length;while(i--)e[n=r[i]]&&(e[n]=!(t[n]=e[n]))})})}function ye(e){return e&&"undefined"!=typeof e.getElementsByTagName&&e}for(e in d=se.support={},i=se.isXML=function(e){var t=e&&e.namespaceURI,n=e&&(e.ownerDocument||e).documentElement;return!Y.test(t||n&&n.nodeName||"HTML")},T=se.setDocument=function(e){var t,n,r=e?e.ownerDocument||e:p;return r!=C&&9===r.nodeType&&r.documentElement&&(a=(C=r).documentElement,E=!i(C),p!=C&&(n=C.defaultView)&&n.top!==n&&(n.addEventListener?n.addEventListener("unload",oe,!1):n.attachEvent&&n.attachEvent("onunload",oe)),d.scope=ce(function(e){return a.appendChild(e).appendChild(C.createElement("div")),"undefined"!=typeof e.querySelectorAll&&!e.querySelectorAll(":scope fieldset div").length}),d.attributes=ce(function(e){return e.className="i",!e.getAttribute("className")}),d.getElementsByTagName=ce(function(e){return e.appendChild(C.createComment("")),!e.getElementsByTagName("*").length}),d.getElementsByClassName=K.test(C.getElementsByClassName),d.getById=ce(function(e){return a.appendChild(e).id=S,!C.getElementsByName||!C.getElementsByName(S).length}),d.getById?(b.filter.ID=function(e){var t=e.replace(te,ne);return function(e){return e.getAttribute("id")===t}},b.find.ID=function(e,t){if("undefined"!=typeof t.getElementById&&E){var n=t.getElementById(e);return n?[n]:[]}}):(b.filter.ID=function(e){var n=e.replace(te,ne);return function(e){var t="undefined"!=typeof e.getAttributeNode&&e.getAttributeNode("id");return t&&t.value===n}},b.find.ID=function(e,t){if("undefined"!=typeof t.getElementById&&E){var n,r,i,o=t.getElementById(e);if(o){if((n=o.getAttributeNode("id"))&&n.value===e)return[o];i=t.getElementsByName(e),r=0;while(o=i[r++])if((n=o.getAttributeNode("id"))&&n.value===e)return[o]}return[]}}),b.find.TAG=d.getElementsByTagName?function(e,t){return"undefined"!=typeof t.getElementsByTagName?t.getElementsByTagName(e):d.qsa?t.querySelectorAll(e):void 0}:function(e,t){var n,r=[],i=0,o=t.getElementsByTagName(e);if("*"===e){while(n=o[i++])1===n.nodeType&&r.push(n);return r}return o},b.find.CLASS=d.getElementsByClassName&&function(e,t){if("undefined"!=typeof t.getElementsByClassName&&E)return t.getElementsByClassName(e)},s=[],v=[],(d.qsa=K.test(C.querySelectorAll))&&(ce(function(e){var t;a.appendChild(e).innerHTML="",e.querySelectorAll("[msallowcapture^='']").length&&v.push("[*^$]="+M+"*(?:''|\"\")"),e.querySelectorAll("[selected]").length||v.push("\\["+M+"*(?:value|"+R+")"),e.querySelectorAll("[id~="+S+"-]").length||v.push("~="),(t=C.createElement("input")).setAttribute("name",""),e.appendChild(t),e.querySelectorAll("[name='']").length||v.push("\\["+M+"*name"+M+"*="+M+"*(?:''|\"\")"),e.querySelectorAll(":checked").length||v.push(":checked"),e.querySelectorAll("a#"+S+"+*").length||v.push(".#.+[+~]"),e.querySelectorAll("\\\f"),v.push("[\\r\\n\\f]")}),ce(function(e){e.innerHTML="";var t=C.createElement("input");t.setAttribute("type","hidden"),e.appendChild(t).setAttribute("name","D"),e.querySelectorAll("[name=d]").length&&v.push("name"+M+"*[*^$|!~]?="),2!==e.querySelectorAll(":enabled").length&&v.push(":enabled",":disabled"),a.appendChild(e).disabled=!0,2!==e.querySelectorAll(":disabled").length&&v.push(":enabled",":disabled"),e.querySelectorAll("*,:x"),v.push(",.*:")})),(d.matchesSelector=K.test(c=a.matches||a.webkitMatchesSelector||a.mozMatchesSelector||a.oMatchesSelector||a.msMatchesSelector))&&ce(function(e){d.disconnectedMatch=c.call(e,"*"),c.call(e,"[s!='']:x"),s.push("!=",F)}),v=v.length&&new RegExp(v.join("|")),s=s.length&&new RegExp(s.join("|")),t=K.test(a.compareDocumentPosition),y=t||K.test(a.contains)?function(e,t){var n=9===e.nodeType?e.documentElement:e,r=t&&t.parentNode;return e===r||!(!r||1!==r.nodeType||!(n.contains?n.contains(r):e.compareDocumentPosition&&16&e.compareDocumentPosition(r)))}:function(e,t){if(t)while(t=t.parentNode)if(t===e)return!0;return!1},j=t?function(e,t){if(e===t)return l=!0,0;var n=!e.compareDocumentPosition-!t.compareDocumentPosition;return n||(1&(n=(e.ownerDocument||e)==(t.ownerDocument||t)?e.compareDocumentPosition(t):1)||!d.sortDetached&&t.compareDocumentPosition(e)===n?e==C||e.ownerDocument==p&&y(p,e)?-1:t==C||t.ownerDocument==p&&y(p,t)?1:u?P(u,e)-P(u,t):0:4&n?-1:1)}:function(e,t){if(e===t)return l=!0,0;var n,r=0,i=e.parentNode,o=t.parentNode,a=[e],s=[t];if(!i||!o)return e==C?-1:t==C?1:i?-1:o?1:u?P(u,e)-P(u,t):0;if(i===o)return pe(e,t);n=e;while(n=n.parentNode)a.unshift(n);n=t;while(n=n.parentNode)s.unshift(n);while(a[r]===s[r])r++;return r?pe(a[r],s[r]):a[r]==p?-1:s[r]==p?1:0}),C},se.matches=function(e,t){return se(e,null,null,t)},se.matchesSelector=function(e,t){if(T(e),d.matchesSelector&&E&&!N[t+" "]&&(!s||!s.test(t))&&(!v||!v.test(t)))try{var n=c.call(e,t);if(n||d.disconnectedMatch||e.document&&11!==e.document.nodeType)return n}catch(e){N(t,!0)}return 0":{dir:"parentNode",first:!0}," ":{dir:"parentNode"},"+":{dir:"previousSibling",first:!0},"~":{dir:"previousSibling"}},preFilter:{ATTR:function(e){return e[1]=e[1].replace(te,ne),e[3]=(e[3]||e[4]||e[5]||"").replace(te,ne),"~="===e[2]&&(e[3]=" "+e[3]+" "),e.slice(0,4)},CHILD:function(e){return e[1]=e[1].toLowerCase(),"nth"===e[1].slice(0,3)?(e[3]||se.error(e[0]),e[4]=+(e[4]?e[5]+(e[6]||1):2*("even"===e[3]||"odd"===e[3])),e[5]=+(e[7]+e[8]||"odd"===e[3])):e[3]&&se.error(e[0]),e},PSEUDO:function(e){var t,n=!e[6]&&e[2];return G.CHILD.test(e[0])?null:(e[3]?e[2]=e[4]||e[5]||"":n&&X.test(n)&&(t=h(n,!0))&&(t=n.indexOf(")",n.length-t)-n.length)&&(e[0]=e[0].slice(0,t),e[2]=n.slice(0,t)),e.slice(0,3))}},filter:{TAG:function(e){var t=e.replace(te,ne).toLowerCase();return"*"===e?function(){return!0}:function(e){return e.nodeName&&e.nodeName.toLowerCase()===t}},CLASS:function(e){var t=m[e+" "];return t||(t=new RegExp("(^|"+M+")"+e+"("+M+"|$)"))&&m(e,function(e){return t.test("string"==typeof e.className&&e.className||"undefined"!=typeof e.getAttribute&&e.getAttribute("class")||"")})},ATTR:function(n,r,i){return function(e){var t=se.attr(e,n);return null==t?"!="===r:!r||(t+="","="===r?t===i:"!="===r?t!==i:"^="===r?i&&0===t.indexOf(i):"*="===r?i&&-1:\x20\t\r\n\f]*)[\x20\t\r\n\f]*\/?>(?:<\/\1>|)$/i;function j(e,n,r){return m(n)?S.grep(e,function(e,t){return!!n.call(e,t,e)!==r}):n.nodeType?S.grep(e,function(e){return e===n!==r}):"string"!=typeof n?S.grep(e,function(e){return-1)[^>]*|#([\w-]+))$/;(S.fn.init=function(e,t,n){var r,i;if(!e)return this;if(n=n||D,"string"==typeof e){if(!(r="<"===e[0]&&">"===e[e.length-1]&&3<=e.length?[null,e,null]:q.exec(e))||!r[1]&&t)return!t||t.jquery?(t||n).find(e):this.constructor(t).find(e);if(r[1]){if(t=t instanceof S?t[0]:t,S.merge(this,S.parseHTML(r[1],t&&t.nodeType?t.ownerDocument||t:E,!0)),N.test(r[1])&&S.isPlainObject(t))for(r in t)m(this[r])?this[r](t[r]):this.attr(r,t[r]);return this}return(i=E.getElementById(r[2]))&&(this[0]=i,this.length=1),this}return e.nodeType?(this[0]=e,this.length=1,this):m(e)?void 0!==n.ready?n.ready(e):e(S):S.makeArray(e,this)}).prototype=S.fn,D=S(E);var L=/^(?:parents|prev(?:Until|All))/,H={children:!0,contents:!0,next:!0,prev:!0};function O(e,t){while((e=e[t])&&1!==e.nodeType);return e}S.fn.extend({has:function(e){var t=S(e,this),n=t.length;return this.filter(function(){for(var e=0;e\x20\t\r\n\f]*)/i,he=/^$|^module$|\/(?:java|ecma)script/i;ce=E.createDocumentFragment().appendChild(E.createElement("div")),(fe=E.createElement("input")).setAttribute("type","radio"),fe.setAttribute("checked","checked"),fe.setAttribute("name","t"),ce.appendChild(fe),y.checkClone=ce.cloneNode(!0).cloneNode(!0).lastChild.checked,ce.innerHTML="",y.noCloneChecked=!!ce.cloneNode(!0).lastChild.defaultValue,ce.innerHTML="",y.option=!!ce.lastChild;var ge={thead:[1,"","
"],col:[2,"","
"],tr:[2,"","
"],td:[3,"","
"],_default:[0,"",""]};function ve(e,t){var n;return n="undefined"!=typeof e.getElementsByTagName?e.getElementsByTagName(t||"*"):"undefined"!=typeof e.querySelectorAll?e.querySelectorAll(t||"*"):[],void 0===t||t&&A(e,t)?S.merge([e],n):n}function ye(e,t){for(var n=0,r=e.length;n",""]);var me=/<|&#?\w+;/;function xe(e,t,n,r,i){for(var o,a,s,u,l,c,f=t.createDocumentFragment(),p=[],d=0,h=e.length;d\s*$/g;function je(e,t){return A(e,"table")&&A(11!==t.nodeType?t:t.firstChild,"tr")&&S(e).children("tbody")[0]||e}function De(e){return e.type=(null!==e.getAttribute("type"))+"/"+e.type,e}function qe(e){return"true/"===(e.type||"").slice(0,5)?e.type=e.type.slice(5):e.removeAttribute("type"),e}function Le(e,t){var n,r,i,o,a,s;if(1===t.nodeType){if(Y.hasData(e)&&(s=Y.get(e).events))for(i in Y.remove(t,"handle events"),s)for(n=0,r=s[i].length;n").attr(n.scriptAttrs||{}).prop({charset:n.scriptCharset,src:n.url}).on("load error",i=function(e){r.remove(),i=null,e&&t("error"===e.type?404:200,e.type)}),E.head.appendChild(r[0])},abort:function(){i&&i()}}});var _t,zt=[],Ut=/(=)\?(?=&|$)|\?\?/;S.ajaxSetup({jsonp:"callback",jsonpCallback:function(){var e=zt.pop()||S.expando+"_"+wt.guid++;return this[e]=!0,e}}),S.ajaxPrefilter("json jsonp",function(e,t,n){var r,i,o,a=!1!==e.jsonp&&(Ut.test(e.url)?"url":"string"==typeof e.data&&0===(e.contentType||"").indexOf("application/x-www-form-urlencoded")&&Ut.test(e.data)&&"data");if(a||"jsonp"===e.dataTypes[0])return r=e.jsonpCallback=m(e.jsonpCallback)?e.jsonpCallback():e.jsonpCallback,a?e[a]=e[a].replace(Ut,"$1"+r):!1!==e.jsonp&&(e.url+=(Tt.test(e.url)?"&":"?")+e.jsonp+"="+r),e.converters["script json"]=function(){return o||S.error(r+" was not called"),o[0]},e.dataTypes[0]="json",i=C[r],C[r]=function(){o=arguments},n.always(function(){void 0===i?S(C).removeProp(r):C[r]=i,e[r]&&(e.jsonpCallback=t.jsonpCallback,zt.push(r)),o&&m(i)&&i(o[0]),o=i=void 0}),"script"}),y.createHTMLDocument=((_t=E.implementation.createHTMLDocument("").body).innerHTML="
",2===_t.childNodes.length),S.parseHTML=function(e,t,n){return"string"!=typeof e?[]:("boolean"==typeof t&&(n=t,t=!1),t||(y.createHTMLDocument?((r=(t=E.implementation.createHTMLDocument("")).createElement("base")).href=E.location.href,t.head.appendChild(r)):t=E),o=!n&&[],(i=N.exec(e))?[t.createElement(i[1])]:(i=xe([e],t,o),o&&o.length&&S(o).remove(),S.merge([],i.childNodes)));var r,i,o},S.fn.load=function(e,t,n){var r,i,o,a=this,s=e.indexOf(" ");return-1").append(S.parseHTML(e)).find(r):e)}).always(n&&function(e,t){a.each(function(){n.apply(this,o||[e.responseText,t,e])})}),this},S.expr.pseudos.animated=function(t){return S.grep(S.timers,function(e){return t===e.elem}).length},S.offset={setOffset:function(e,t,n){var r,i,o,a,s,u,l=S.css(e,"position"),c=S(e),f={};"static"===l&&(e.style.position="relative"),s=c.offset(),o=S.css(e,"top"),u=S.css(e,"left"),("absolute"===l||"fixed"===l)&&-1<(o+u).indexOf("auto")?(a=(r=c.position()).top,i=r.left):(a=parseFloat(o)||0,i=parseFloat(u)||0),m(t)&&(t=t.call(e,n,S.extend({},s))),null!=t.top&&(f.top=t.top-s.top+a),null!=t.left&&(f.left=t.left-s.left+i),"using"in t?t.using.call(e,f):c.css(f)}},S.fn.extend({offset:function(t){if(arguments.length)return void 0===t?this:this.each(function(e){S.offset.setOffset(this,t,e)});var e,n,r=this[0];return r?r.getClientRects().length?(e=r.getBoundingClientRect(),n=r.ownerDocument.defaultView,{top:e.top+n.pageYOffset,left:e.left+n.pageXOffset}):{top:0,left:0}:void 0},position:function(){if(this[0]){var e,t,n,r=this[0],i={top:0,left:0};if("fixed"===S.css(r,"position"))t=r.getBoundingClientRect();else{t=this.offset(),n=r.ownerDocument,e=r.offsetParent||n.documentElement;while(e&&(e===n.body||e===n.documentElement)&&"static"===S.css(e,"position"))e=e.parentNode;e&&e!==r&&1===e.nodeType&&((i=S(e).offset()).top+=S.css(e,"borderTopWidth",!0),i.left+=S.css(e,"borderLeftWidth",!0))}return{top:t.top-i.top-S.css(r,"marginTop",!0),left:t.left-i.left-S.css(r,"marginLeft",!0)}}},offsetParent:function(){return this.map(function(){var e=this.offsetParent;while(e&&"static"===S.css(e,"position"))e=e.offsetParent;return e||re})}}),S.each({scrollLeft:"pageXOffset",scrollTop:"pageYOffset"},function(t,i){var o="pageYOffset"===i;S.fn[t]=function(e){return $(this,function(e,t,n){var r;if(x(e)?r=e:9===e.nodeType&&(r=e.defaultView),void 0===n)return r?r[i]:e[t];r?r.scrollTo(o?r.pageXOffset:n,o?n:r.pageYOffset):e[t]=n},t,e,arguments.length)}}),S.each(["top","left"],function(e,n){S.cssHooks[n]=Fe(y.pixelPosition,function(e,t){if(t)return t=We(e,n),Pe.test(t)?S(e).position()[n]+"px":t})}),S.each({Height:"height",Width:"width"},function(a,s){S.each({padding:"inner"+a,content:s,"":"outer"+a},function(r,o){S.fn[o]=function(e,t){var n=arguments.length&&(r||"boolean"!=typeof e),i=r||(!0===e||!0===t?"margin":"border");return $(this,function(e,t,n){var r;return x(e)?0===o.indexOf("outer")?e["inner"+a]:e.document.documentElement["client"+a]:9===e.nodeType?(r=e.documentElement,Math.max(e.body["scroll"+a],r["scroll"+a],e.body["offset"+a],r["offset"+a],r["client"+a])):void 0===n?S.css(e,t,i):S.style(e,t,n,i)},s,n?e:void 0,n)}})}),S.each(["ajaxStart","ajaxStop","ajaxComplete","ajaxError","ajaxSuccess","ajaxSend"],function(e,t){S.fn[t]=function(e){return this.on(t,e)}}),S.fn.extend({bind:function(e,t,n){return this.on(e,null,t,n)},unbind:function(e,t){return this.off(e,null,t)},delegate:function(e,t,n,r){return this.on(t,e,n,r)},undelegate:function(e,t,n){return 1===arguments.length?this.off(e,"**"):this.off(t,e||"**",n)},hover:function(e,t){return this.mouseenter(e).mouseleave(t||e)}}),S.each("blur focus focusin focusout resize scroll click dblclick mousedown mouseup mousemove mouseover mouseout mouseenter mouseleave change select submit keydown keypress keyup contextmenu".split(" "),function(e,n){S.fn[n]=function(e,t){return 0"),n("table.docutils.footnote").wrap("
"),n("table.docutils.citation").wrap("
"),n(".wy-menu-vertical ul").not(".simple").siblings("a").each((function(){var t=n(this);expand=n(''),expand.on("click",(function(n){return e.toggleCurrent(t),n.stopPropagation(),!1})),t.prepend(expand)}))},reset:function(){var n=encodeURI(window.location.hash)||"#";try{var e=$(".wy-menu-vertical"),t=e.find('[href="'+n+'"]');if(0===t.length){var i=$('.document [id="'+n.substring(1)+'"]').closest("div.section");0===(t=e.find('[href="#'+i.attr("id")+'"]')).length&&(t=e.find('[href="#"]'))}if(t.length>0){$(".wy-menu-vertical .current").removeClass("current").attr("aria-expanded","false"),t.addClass("current").attr("aria-expanded","true"),t.closest("li.toctree-l1").parent().addClass("current").attr("aria-expanded","true");for(let n=1;n<=10;n++)t.closest("li.toctree-l"+n).addClass("current").attr("aria-expanded","true");t[0].scrollIntoView()}}catch(n){console.log("Error expanding nav for anchor",n)}},onScroll:function(){this.winScroll=!1;var n=this.win.scrollTop(),e=n+this.winHeight,t=this.navBar.scrollTop()+(n-this.winPosition);n<0||e>this.docHeight||(this.navBar.scrollTop(t),this.winPosition=n)},onResize:function(){this.winResize=!1,this.winHeight=this.win.height(),this.docHeight=$(document).height()},hashChange:function(){this.linkScroll=!0,this.win.one("hashchange",(function(){this.linkScroll=!1}))},toggleCurrent:function(n){var e=n.closest("li");e.siblings("li.current").removeClass("current").attr("aria-expanded","false"),e.siblings().find("li.current").removeClass("current").attr("aria-expanded","false");var t=e.find("> ul li");t.length&&(t.removeClass("current").attr("aria-expanded","false"),e.toggleClass("current").attr("aria-expanded",(function(n,e){return"true"==e?"false":"true"})))}},"undefined"!=typeof window&&(window.SphinxRtdTheme={Navigation:n.exports.ThemeNav,StickyNav:n.exports.ThemeNav}),function(){for(var n=0,e=["ms","moz","webkit","o"],t=0;t + + + + + + + API Docs - Quinn + + + + + + + + + + + + + + +
+ + +
+ +
+
+
    +
  • + +
  • +
  • +
+
+
+ + +
+
+ +
+ +
+ +
+ + + + « Previous + + + +
+ + + + + + + + + diff --git a/reference/quinn/append_if_schema_identical/index.html b/reference/quinn/append_if_schema_identical/index.html new file mode 100644 index 00000000..e15b10a6 --- /dev/null +++ b/reference/quinn/append_if_schema_identical/index.html @@ -0,0 +1,274 @@ + + + + + + + + Append if schema identical - Quinn + + + + + + + + + + + + + + +
+ + +
+ +
+
+
    +
  • + +
  • +
  • +
+
+
+
+
+ +
+ + + +
+ + + +
+ + + + + + + + +
+ + + +

+ SchemaMismatchError + + +

+ + +
+

+ Bases: ValueError

+ + +

raise this when there's a schema mismatch between source & target schema.

+ + +
+ Source code in quinn/append_if_schema_identical.py +
class SchemaMismatchError(ValueError):
+    """raise this when there's a schema mismatch between source & target schema."""
+
+ +
+ +
+ + +
+ + + +

+append_if_schema_identical(source_df, target_df) + +

+ + +
+ +

Compare the schema of source & target dataframe.

+ +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
source_df + DataFrame +

Input DataFrame

+ required +
target_df + DataFrame +

Input DataFrame

+ required +
+ +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ pyspark.sql.DataFrame +

dataframe

+ +
+ Source code in quinn/append_if_schema_identical.py +
def append_if_schema_identical(source_df: DataFrame, target_df: DataFrame) -> DataFrame:
+    """Compare the schema of source & target dataframe.
+
+    :param source_df: Input DataFrame
+    :type source_df: pyspark.sql.DataFrame
+    :param target_df: Input DataFrame
+    :type target_df: pyspark.sql.DataFrame
+    :return: dataframe
+    :rtype: pyspark.sql.DataFrame
+    """
+    # Retrieve the schemas of the source and target dataframes
+    source_schema = source_df.schema
+    target_schema = target_df.schema
+
+    # Convert the schemas to a list of tuples
+    source_schema_list = [(field.name, str(field.dataType)) for field in source_schema]
+    target_schema_list = [(field.name, str(field.dataType)) for field in target_schema]
+
+    unmatched_cols = [
+        col for col in source_schema_list if col not in target_schema_list
+    ]
+    error_message = (
+        f"The schemas of the source and target dataframes are not identical."
+        f"From source schema column {unmatched_cols} is missing in target schema"
+    )
+    # Check if the column names in the source and target schemas are the same, regardless of their order
+    if set(source_schema.fieldNames()) != set(target_schema.fieldNames()):
+        raise SchemaMismatchError(error_message)
+    # Check if the column names and data types in the source and target schemas are the same, in the same order
+    if sorted(source_schema_list) != sorted(target_schema_list):
+        raise SchemaMismatchError(error_message)
+
+    # Append the dataframes if the schemas are identical
+    return target_df.unionByName(source_df)
+
+
+ +
+ + + +
+ +
+ +
+ +
+
+ +
+
+ +
+ +
+ +
+ + + + + +
+ + + + + + + + + diff --git a/reference/quinn/dataframe_helpers/index.html b/reference/quinn/dataframe_helpers/index.html new file mode 100644 index 00000000..d6a86b18 --- /dev/null +++ b/reference/quinn/dataframe_helpers/index.html @@ -0,0 +1,694 @@ + + + + + + + + Dataframe helpers - Quinn + + + + + + + + + + + + + + +
+ + +
+ +
+
+
    +
  • + +
  • +
  • +
+
+
+
+
+ +
+ + + +
+ + + +
+ + + + + + + + + +
+ + + +

+column_to_list(df, col_name) + +

+ + +
+ +

Collect column to list of values.

+ +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
df + DataFrame +

Input DataFrame

+ required +
col_name + str +

Column to collect

+ required +
+ +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ List[Any] +

List of values

+ +
+ Source code in quinn/dataframe_helpers.py +
def column_to_list(df: DataFrame, col_name: str) -> list[Any]:
+    """Collect column to list of values.
+
+    :param df: Input DataFrame
+    :type df: pyspark.sql.DataFrame
+    :param col_name: Column to collect
+    :type col_name: str
+    :return: List of values
+    :rtype: List[Any]
+    """
+    return [x[col_name] for x in df.select(col_name).collect()]
+
+
+ +
+ +
+ + + +

+create_df(spark, rows_data, col_specs) + +

+ + +
+ +

Create a new DataFrame from the given data and column specs.

+

The returned DataFrame s created using the StructType and StructField classes provided by PySpark.

+ +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
spark + SparkSession +

SparkSession object

+ required +
rows_data + array-like +

the data used to create the DataFrame

+ required +
col_specs + list of tuples +

list of tuples containing the name and type of the field

+ required +
+ +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ DataFrame +

a new DataFrame

+ +
+ Source code in quinn/dataframe_helpers.py +
def create_df(spark: SparkSession, rows_data, col_specs) -> DataFrame:  # noqa: ANN001
+    """Create a new DataFrame from the given data and column specs.
+
+    The returned DataFrame s created using the StructType and StructField classes provided by PySpark.
+
+    :param spark: SparkSession object
+    :type spark: SparkSession
+    :param rows_data: the data used to create the DataFrame
+    :type rows_data: array-like
+    :param col_specs: list of tuples containing the name and type of the field
+    :type col_specs: list of tuples
+    :return: a new DataFrame
+    :rtype: DataFrame
+    """
+    struct_fields = list(map(lambda x: StructField(*x), col_specs))  # noqa: C417
+    return spark.createDataFrame(data=rows_data, schema=StructType(struct_fields))
+
+
+ +
+ +
+ + + +

+print_athena_create_table(df, athena_table_name, s3location) + +

+ + +
+ +

Generate the Athena create table statement for a given DataFrame.

+ +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
df + DataFrame +

The pyspark.sql.DataFrame to use

+ required +
athena_table_name + str +

The name of the athena table to generate

+ required +
s3location + str +

The S3 location of the parquet data

+ required +
+ +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ None +

None

+ +
+ Source code in quinn/dataframe_helpers.py +
def print_athena_create_table(
+    df: DataFrame,
+    athena_table_name: str,
+    s3location: str,
+) -> None:
+    """Generate the Athena create table statement for a given DataFrame.
+
+    :param df: The pyspark.sql.DataFrame to use
+    :param athena_table_name: The name of the athena table to generate
+    :param s3location: The S3 location of the parquet data
+    :return: None
+    """
+    fields = df.schema
+
+    print(f"CREATE EXTERNAL TABLE IF NOT EXISTS `{athena_table_name}` ( ")
+
+    for field in fields.fieldNames()[:-1]:
+        print("\t", f"`{fields[field].name}` {fields[field].dataType.simpleString()}, ")
+    last = fields[fields.fieldNames()[-1]]
+    print("\t", f"`{last.name}` {last.dataType.simpleString()} ")
+
+    print(")")
+    print("STORED AS PARQUET")
+    print(f"LOCATION '{s3location}'\n")
+
+
+ +
+ +
+ + + +

+show_output_to_df(show_output, spark) + +

+ + +
+ +

Show output as spark DataFrame.

+ +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
show_output + str +

String representing output of 'show' command in spark

+ required +
spark + SparkSession +

SparkSession object

+ required +
+ +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ Dataframe +

DataFrame object containing output of a show command in spark

+ +
+ Source code in quinn/dataframe_helpers.py +
def show_output_to_df(show_output: str, spark: SparkSession) -> DataFrame:
+    """Show output as spark DataFrame.
+
+    :param show_output: String representing output of 'show' command in spark
+    :type show_output: str
+    :param spark: SparkSession object
+    :type spark: SparkSession
+    :return: DataFrame object containing output of a show command in spark
+    :rtype: Dataframe
+    """
+    lines = show_output.split("\n")
+    ugly_column_names = lines[1]
+    pretty_column_names = [i.strip() for i in ugly_column_names[1:-1].split("|")]
+    pretty_data = []
+    ugly_data = lines[3:-1]
+    for row in ugly_data:
+        r = [i.strip() for i in row[1:-1].split("|")]
+        pretty_data.append(tuple(r))
+    return spark.createDataFrame(pretty_data, pretty_column_names)
+
+
+ +
+ +
+ + + +

+to_list_of_dictionaries(df) + +

+ + +
+ +

Convert a Spark DataFrame to a list of dictionaries.

+ +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
df + DataFrame +

The Spark DataFrame to convert.

+ required +
+ +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ List[Dict[str, Any]] +

A list of dictionaries representing the rows in the DataFrame.

+ +
+ Source code in quinn/dataframe_helpers.py +
def to_list_of_dictionaries(df: DataFrame) -> list[dict[str, Any]]:
+    """Convert a Spark DataFrame to a list of dictionaries.
+
+    :param df: The Spark DataFrame to convert.
+    :type df: :py:class:`pyspark.sql.DataFrame`
+    :return: A list of dictionaries representing the rows in the DataFrame.
+    :rtype: List[Dict[str, Any]]
+    """
+    return list(map(lambda r: r.asDict(), df.collect()))  # noqa: C417
+
+
+ +
+ +
+ + + +

+two_columns_to_dictionary(df, key_col_name, value_col_name) + +

+ + +
+ +

Collect two columns as dictionary when first column is key and second is value.

+ +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
df + DataFrame +

Input DataFrame

+ required +
key_col_name + str +

Key-column

+ required +
value_col_name + str +

Value-column

+ required +
+ +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ Dict[str, Any] +

Dictionary with values

+ +
+ Source code in quinn/dataframe_helpers.py +
def two_columns_to_dictionary(
+    df: DataFrame,
+    key_col_name: str,
+    value_col_name: str,
+) -> dict[str, Any]:
+    """Collect two columns as dictionary when first column is key and second is value.
+
+    :param df: Input DataFrame
+    :type df: pyspark.sql.DataFrame
+    :param key_col_name: Key-column
+    :type key_col_name: str
+    :param value_col_name: Value-column
+    :type value_col_name: str
+    :return: Dictionary with values
+    :rtype: Dict[str, Any]
+    """
+    k, v = key_col_name, value_col_name
+    return {x[k]: x[v] for x in df.select(k, v).collect()}
+
+
+ +
+ + + +
+ +
+ +
+ +
+
+ +
+
+ +
+ +
+ +
+ + + + + +
+ + + + + + + + + diff --git a/reference/quinn/dataframe_validator/index.html b/reference/quinn/dataframe_validator/index.html new file mode 100644 index 00000000..f348dfe9 --- /dev/null +++ b/reference/quinn/dataframe_validator/index.html @@ -0,0 +1,534 @@ + + + + + + + + Dataframe validator - Quinn + + + + + + + + + + + + + + +
+ + +
+ +
+
+
    +
  • + +
  • +
  • +
+
+
+
+
+ +
+ + + +
+ + + +
+ + + + + + + + +
+ + + +

+ DataFrameMissingColumnError + + +

+ + +
+

+ Bases: ValueError

+ + +

Raise this when there's a DataFrame column error.

+ + +
+ Source code in quinn/dataframe_validator.py +
class DataFrameMissingColumnError(ValueError):
+    """Raise this when there's a DataFrame column error."""
+
+ +
+ +
+ +
+ + + +

+ DataFrameMissingStructFieldError + + +

+ + +
+

+ Bases: ValueError

+ + +

Raise this when there's a DataFrame column error.

+ + +
+ Source code in quinn/dataframe_validator.py +
class DataFrameMissingStructFieldError(ValueError):
+    """Raise this when there's a DataFrame column error."""
+
+ +
+ +
+ +
+ + + +

+ DataFrameProhibitedColumnError + + +

+ + +
+

+ Bases: ValueError

+ + +

Raise this when a DataFrame includes prohibited columns.

+ + +
+ Source code in quinn/dataframe_validator.py +
class DataFrameProhibitedColumnError(ValueError):
+    """Raise this when a DataFrame includes prohibited columns."""
+
+ +
+ +
+ + +
+ + + +

+validate_absence_of_columns(df, prohibited_col_names) + +

+ + +
+ +

Validate that none of the prohibited column names are present among specified DataFrame columns.

+ +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
df + DataFrame +

DataFrame containing columns to be checked.

+ required +
prohibited_col_names + list[str] +

List of prohibited column names.

+ required +
+ +

Raises:

+ + + + + + + + + + + + + +
TypeDescription
+ DataFrameProhibitedColumnError +

If the prohibited column names are present among the specified DataFrame columns.

+ +
+ Source code in quinn/dataframe_validator.py +
def validate_absence_of_columns(df: DataFrame, prohibited_col_names: list[str]) -> None:
+    """Validate that none of the prohibited column names are present among specified DataFrame columns.
+
+    :param df: DataFrame containing columns to be checked.
+    :param prohibited_col_names: List of prohibited column names.
+    :raises DataFrameProhibitedColumnError: If the prohibited column names are
+    present among the specified DataFrame columns.
+    """
+    all_col_names = df.columns
+    extra_col_names = [x for x in all_col_names if x in prohibited_col_names]
+    error_message = f"The {extra_col_names} columns are not allowed to be included in the DataFrame with the following columns {all_col_names}"
+    if extra_col_names:
+        raise DataFrameProhibitedColumnError(error_message)
+
+
+ +
+ +
+ + + +

+validate_presence_of_columns(df, required_col_names) + +

+ + +
+ +

Validate the presence of column names in a DataFrame.

+ +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
df + DataFrame +

A spark DataFrame.

+ required +
required_col_names + list[str] +

List of the required column names for the DataFrame.

+ required +
+ +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ None +

None.

+ +

Raises:

+ + + + + + + + + + + + + +
TypeDescription
+ DataFrameMissingColumnError +

if any of the requested column names are not present in the DataFrame.

+ +
+ Source code in quinn/dataframe_validator.py +
def validate_presence_of_columns(df: DataFrame, required_col_names: list[str]) -> None:
+    """Validate the presence of column names in a DataFrame.
+
+    :param df: A spark DataFrame.
+    :type df: DataFrame`
+    :param required_col_names: List of the required column names for the DataFrame.
+    :type required_col_names: :py:class:`list` of :py:class:`str`
+    :return: None.
+    :raises DataFrameMissingColumnError: if any of the requested column names are
+    not present in the DataFrame.
+    """
+    all_col_names = df.columns
+    missing_col_names = [x for x in required_col_names if x not in all_col_names]
+    error_message = f"The {missing_col_names} columns are not included in the DataFrame with the following columns {all_col_names}"
+    if missing_col_names:
+        raise DataFrameMissingColumnError(error_message)
+
+
+ +
+ +
+ + + +

+validate_schema(df, required_schema, ignore_nullable=False) + +

+ + +
+ +

Function that validate if a given DataFrame has a given StructType as its schema.

+ +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
df + DataFrame +

DataFrame to validate

+ required +
required_schema + StructType +

StructType required for the DataFrame

+ required +
ignore_nullable + bool +

(Optional) A flag for if nullable fields should be ignored during validation

+ False +
+ +

Raises:

+ + + + + + + + + + + + + +
TypeDescription
+ DataFrameMissingStructFieldError +

if any StructFields from the required schema are not included in the DataFrame schema

+ +
+ Source code in quinn/dataframe_validator.py +
def validate_schema(
+    df: DataFrame,
+    required_schema: StructType,
+    ignore_nullable: bool = False,
+) -> None:
+    """Function that validate if a given DataFrame has a given StructType as its schema.
+
+    :param df: DataFrame to validate
+    :type df: DataFrame
+    :param required_schema: StructType required for the DataFrame
+    :type required_schema: StructType
+    :param ignore_nullable: (Optional) A flag for if nullable fields should be
+    ignored during validation
+    :type ignore_nullable: bool, optional
+
+    :raises DataFrameMissingStructFieldError: if any StructFields from the required
+    schema are not included in the DataFrame schema
+    """
+    _all_struct_fields = copy.deepcopy(df.schema)
+    _required_schema = copy.deepcopy(required_schema)
+
+    if ignore_nullable:
+        for x in _all_struct_fields:
+            x.nullable = None
+
+        for x in _required_schema:
+            x.nullable = None
+
+    missing_struct_fields = [x for x in _required_schema if x not in _all_struct_fields]
+    error_message = f"The {missing_struct_fields} StructFields are not included in the DataFrame with the following StructFields {_all_struct_fields}"
+
+    if missing_struct_fields:
+        raise DataFrameMissingStructFieldError(error_message)
+
+
+ +
+ + + +
+ +
+ +
+ +
+
+ +
+
+ +
+ +
+ +
+ + + + + +
+ + + + + + + + + diff --git a/reference/quinn/extensions/column_ext/index.html b/reference/quinn/extensions/column_ext/index.html new file mode 100644 index 00000000..57ff8768 --- /dev/null +++ b/reference/quinn/extensions/column_ext/index.html @@ -0,0 +1,627 @@ + + + + + + + + Column ext - Quinn + + + + + + + + + + + + + + +
+ + +
+ +
+
+
    +
  • + +
  • +
  • +
+
+
+
+
+ +
+ + + +
+ + + +
+ + + + + + + + + +
+ + + +

+isFalse(self) + +

+ + +
+ +

Function checks if the column is equal to False and returns the column.

+ +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
self + Column +

Column

+ required +
+ +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ Column +

Column

+ +
+ Source code in quinn/extensions/column_ext.py +
def isFalse(self: Column) -> Column:
+    """Function checks if the column is equal to False and returns the column.
+
+    :param self: Column
+    :return: Column
+    :rtype: Column
+    """
+    return self == lit(False)
+
+
+ +
+ +
+ + + +

+isFalsy(self) + +

+ + +
+ +

Returns a Column indicating whether all values in the Column are False or NULL (falsy).

+

Each element in the resulting column is True if all the elements in the +Column are either NULL or False, or False otherwise. This is accomplished by +performing a bitwise or of the isNull condition and a literal False value and +then wrapping the result in a when statement.

+ +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
self + Column +

Column object

+ required +
+ +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ Column +

Column object

+ +
+ Source code in quinn/extensions/column_ext.py +
def isFalsy(self: Column) -> Column:
+    """Returns a Column indicating whether all values in the Column are False or NULL (**falsy**).
+
+    Each element in the resulting column is True if all the elements in the
+    Column are either NULL or False, or False otherwise. This is accomplished by
+    performing a bitwise or of the ``isNull`` condition and a literal False value and
+    then wrapping the result in a **when** statement.
+
+    :param self: Column object
+    :returns: Column object
+    :rtype: Column
+    """
+    return when(self.isNull() | (self == lit(False)), True).otherwise(False)
+
+
+ +
+ +
+ + + +

+isNotIn(self, _list) + +

+ + +
+ +

To see if a value is not in a list of values.

+

:_list: list[Any]

+ +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
self + Column +

Column object

+ required +
+ +
+ Source code in quinn/extensions/column_ext.py +
def isNotIn(self: Column, _list: list[Any]) -> Column:
+    """To see if a value is not in a list of values.
+
+    :param self: Column object
+    :_list: list[Any]
+    :rtype: Column
+    """
+    return ~(self.isin(_list))
+
+
+ +
+ +
+ + + +

+isNullOrBlank(self) + +

+ + +
+ +

Returns a Boolean value which expresses whether a given column is null or contains only blank characters.

+ +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
\*\*self +

The :class:Column to check.

+ required +
+ +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ Column +

A Column containing True if the column is null or only contains blank characters, or False otherwise.

+ +
+ Source code in quinn/extensions/column_ext.py +
def isNullOrBlank(self: Column) -> Column:
+    r"""Returns a Boolean value which expresses whether a given column is ``null`` or contains only blank characters.
+
+    :param \*\*self: The  :class:`Column` to check.
+
+    :returns: A `Column` containing ``True`` if the column is ``null`` or only contains
+    blank characters, or ``False`` otherwise.
+    :rtype: Column
+    """
+    return (self.isNull()) | (trim(self) == "")
+
+
+ +
+ +
+ + + +

+isTrue(self) + +

+ + +
+ +

Function takes a column of type Column as an argument and returns a column of type Column.

+

It evaluates whether each element in the column argument is equal to True, and +if so will return True, otherwise False.

+ +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
self + Column +

Column object

+ required +
+ +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ Column +

Column object

+ +
+ Source code in quinn/extensions/column_ext.py +
def isTrue(self: Column) -> Column:
+    """Function takes a column of type Column as an argument and returns a column of type Column.
+
+    It evaluates whether each element in the column argument is equal to True, and
+    if so will return True, otherwise False.
+
+    :param self: Column object
+    :returns: Column object
+    :rtype: Column
+    """
+    return self == lit(True)
+
+
+ +
+ +
+ + + +

+isTruthy(self) + +

+ + +
+ +

Calculates a boolean expression that is the opposite of isFalsy for the given Column self.

+ +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
self + Column +

The Column to calculate the opposite of isFalsy for.

+ required +
+ +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ Column +

A Column with the results of the calculation.

+ +
+ Source code in quinn/extensions/column_ext.py +
def isTruthy(self: Column) -> Column:
+    """Calculates a boolean expression that is the opposite of isFalsy for the given ``Column`` self.
+
+    :param Column self: The ``Column`` to calculate the opposite of isFalsy for.
+    :returns: A ``Column`` with the results of the calculation.
+    :rtype: Column
+    """
+    return ~(self.isFalsy())
+
+
+ +
+ +
+ + + +

+nullBetween(self, lower, upper) + +

+ + +
+ +

To see if a value is between two values in a null friendly way.

+

:lower: Column +:upper: Column

+ +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
self + Column +

Column object

+ required +
+ +
+ Source code in quinn/extensions/column_ext.py +
def nullBetween(self: Column, lower: Column, upper: Column) -> Column:
+    """To see if a value is between two values in a null friendly way.
+
+    :param self: Column object
+    :lower: Column
+    :upper: Column
+    :rtype: Column
+    """
+    return when(lower.isNull() & upper.isNull(), False).otherwise(
+        when(self.isNull(), False).otherwise(
+            when(lower.isNull() & upper.isNotNull() & (self <= upper), True).otherwise(
+                when(
+                    lower.isNotNull() & upper.isNull() & (self >= lower),
+                    True,
+                ).otherwise(self.between(lower, upper)),
+            ),
+        ),
+    )
+
+
+ +
+ + + +
+ +
+ +
+ +
+
+ +
+
+ +
+ +
+ +
+ + + + + +
+ + + + + + + + + diff --git a/reference/quinn/extensions/dataframe_ext/index.html b/reference/quinn/extensions/dataframe_ext/index.html new file mode 100644 index 00000000..77fda371 --- /dev/null +++ b/reference/quinn/extensions/dataframe_ext/index.html @@ -0,0 +1,138 @@ + + + + + + + + Dataframe ext - Quinn + + + + + + + + + + + + + + +
+ + +
+ +
+
+
    +
  • + +
  • +
  • +
+
+
+
+
+ +
+ + + +
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ +
+
+ +
+
+ +
+ +
+ +
+ + + + + +
+ + + + + + + + + diff --git a/reference/quinn/extensions/index.html b/reference/quinn/extensions/index.html new file mode 100644 index 00000000..8068c687 --- /dev/null +++ b/reference/quinn/extensions/index.html @@ -0,0 +1,140 @@ + + + + + + + + Index - Quinn + + + + + + + + + + + + + + +
+ + +
+ +
+
+
    +
  • + +
  • +
  • +
+
+
+
+
+ +
+ + + +
+ +

Extensions API.

+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ +
+
+ +
+
+ +
+ +
+ +
+ + + + + +
+ + + + + + + + + diff --git a/reference/quinn/extensions/spark_session_ext/index.html b/reference/quinn/extensions/spark_session_ext/index.html new file mode 100644 index 00000000..f146c344 --- /dev/null +++ b/reference/quinn/extensions/spark_session_ext/index.html @@ -0,0 +1,232 @@ + + + + + + + + Spark session ext - Quinn + + + + + + + + + + + + + + +
+ + +
+ +
+
+
    +
  • + +
  • +
  • +
+
+
+
+
+ +
+ + + +
+ + + +
+ + + + + + + + + +
+ + + +

+create_df(self, rows_data, col_specs) + +

+ + +
+ +

Creates a new DataFrame from the given data and column specs.

+

The returned DataFrame is created using the StructType and StructField classes provided by PySpark.

+ +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
rows_data + array-like +

the data used to create the DataFrame

+ required +
col_specs + list[tuple] +

list of tuples containing the name and type of the field

+ required +
+ +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ DataFrame +

a new DataFrame

+ +
+ Source code in quinn/extensions/spark_session_ext.py +
def create_df(self: Self, rows_data, col_specs: list[tuple]) -> DataFrame:  # noqa: ANN001
+    """Creates a new DataFrame from the given data and column specs.
+
+    The returned DataFrame is created using the StructType and StructField classes provided by PySpark.
+
+    :param rows_data: the data used to create the DataFrame
+    :type rows_data: array-like
+    :param col_specs: list of tuples containing the name and type of the field
+    :type col_specs: list of tuples
+    :return: a new DataFrame
+    :rtype: DataFrame
+    """
+    warnings.warn(
+        "Extensions may be removed in the future versions of quinn. Please use `quinn.create_df()` instead",
+        category=DeprecationWarning,
+        stacklevel=2,
+    )
+
+    struct_fields = [StructField(*x) for x in col_specs]
+    return self.createDataFrame(data=rows_data, schema=StructType(struct_fields))
+
+
+ +
+ + + +
+ +
+ +
+ +
+
+ +
+
+ +
+ +
+ +
+ + + + + +
+ + + + + + + + + diff --git a/reference/quinn/functions/index.html b/reference/quinn/functions/index.html new file mode 100644 index 00000000..ba5feb23 --- /dev/null +++ b/reference/quinn/functions/index.html @@ -0,0 +1,1346 @@ + + + + + + + + Functions - Quinn + + + + + + + + + + + + + + +
+ + +
+ +
+
+
    +
  • + +
  • +
  • +
+
+
+
+
+ +
+ + + +
+ + + +
+ + + + + + + + + +
+ + + +

+anti_trim(col) + +

+ + +
+ +

Remove whitespace from the boundaries of col using the regexp_replace function.

+ +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
col + Column +

Column on which to perform the regexp_replace.

+ required +
+ +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ Column +

A new Column with all whitespace removed from the boundaries.

+ +
+ Source code in quinn/functions.py +
def anti_trim(col: Column) -> Column:
+    """Remove whitespace from the boundaries of ``col`` using the regexp_replace function.
+
+    :param col: Column on which to perform the regexp_replace.
+    :type col: Column
+    :return: A new Column with all whitespace removed from the boundaries.
+    :rtype: Column
+    """
+    return F.regexp_replace(col, "\\b\\s+\\b", "")
+
+
+ +
+ +
+ + + +

+approx_equal(col1, col2, threshold) + +

+ + +
+ +

Compare two Column objects by checking if the difference between them is less than a specified threshold.

+ +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
col1 + Column +

the first Column

+ required +
col2 + Column +

the second Column

+ required +
threshold + Number +

value to compare with

+ required +
+ +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ Column +

Boolean Column with True indicating that abs(col1 - col2) is less than threshold

+ +
+ Source code in quinn/functions.py +
def approx_equal(col1: Column, col2: Column, threshold: Number) -> Column:
+    """Compare two ``Column`` objects by checking if the difference between them is less than a specified ``threshold``.
+
+    :param col1: the first ``Column``
+    :type col1: Column
+    :param col2: the second ``Column``
+    :type col2: Column
+    :param threshold: value to compare with
+    :type threshold: Number
+    :return: Boolean ``Column`` with ``True`` indicating that ``abs(col1 -
+    col2)`` is less than ``threshold``
+    """
+    return F.abs(col1 - col2) < threshold
+
+
+ +
+ +
+ + + +

+array_choice(col, seed=None) + +

+ + +
+ +

Returns one random element from the given column.

+ +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
col + Column +

Column from which element is chosen

+ required +
+ +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ Column +

random element from the given column

+ +
+ Source code in quinn/functions.py +
def array_choice(col: Column, seed: int | None = None) -> Column:
+    """Returns one random element from the given column.
+
+    :param col: Column from which element is chosen
+    :type col: Column
+    :return: random element from the given column
+    :rtype: Column
+    """
+    index = (F.rand(seed) * F.size(col)).cast("int")
+    return col[index]
+
+
+ +
+ +
+ + + +

+business_days_between(start_date, end_date) + +

+ + +
+ +

Function takes two Spark Columns and returns a Column with the number of business days between the start and the end date.

+ +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
start_date + Column +

The column with the start dates

+ required +
end_date + Column +

The column with the end dates

+ required +
+ +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ Column +

a Column with the number of business days between the start and the end date

+ +
+ Source code in quinn/functions.py +
def business_days_between(start_date: Column, end_date: Column) -> Column:  # noqa: ARG001
+    """Function takes two Spark `Columns` and returns a `Column` with the number of business days between the start and the end date.
+
+    :param start_date: The column with the start dates
+    :type start_date: Column
+    :param end_date: The column with the end dates
+    :type end_date: Column
+    :returns: a Column with the number of business days between the start and the end date
+    :rtype: Column
+    """
+    all_days = "sequence(start_date, end_date)"
+    days_of_week = f"transform({all_days}, x -> date_format(x, 'E'))"
+    filter_weekends = F.expr(f"filter({days_of_week}, x -> x NOT IN ('Sat', 'Sun'))")
+    num_business_days = F.size(filter_weekends) - 1
+
+    return F.when(num_business_days < 0, None).otherwise(num_business_days)
+
+
+ +
+ +
+ + + +

+exists(f) + +

+ + +
+ +

Create a user-defined function.

+

It takes a list expressed as a column of type ArrayType(AnyType) as an argument and returns a boolean value indicating +whether any element in the list is true according to the argument f of the exists() function.

+ +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
f + Callable[[Any], bool] +

Callable function - A callable function that takes an element of type Any and returns a boolean value.

+ required +
+ +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ UserDefinedFunction +

A user-defined function that takes a list expressed as a column of type ArrayType(AnyType) as an argument and returns a boolean value indicating whether any element in the list is true according to the argument f of the exists() function.

+ +
+ Source code in quinn/functions.py +
def exists(f: Callable[[Any], bool]) -> udf:
+    """Create a user-defined function.
+
+    It takes a list expressed as a column of type ``ArrayType(AnyType)`` as an argument and returns a boolean value indicating
+    whether any element in the list is true according to the argument ``f`` of the ``exists()`` function.
+
+    :param f: Callable function - A callable function that takes an element of
+    type Any and returns a boolean value.
+    :return: A user-defined function that takes
+    a list expressed as a column of type ArrayType(AnyType) as an argument and
+    returns a boolean value indicating whether any element in the list is true
+    according to the argument ``f`` of the ``exists()`` function.
+    :rtype: UserDefinedFunction
+    """
+
+    def temp_udf(list_: list) -> bool:
+        return any(map(f, list_))
+
+    return F.udf(temp_udf, BooleanType())
+
+
+ +
+ +
+ + + +

+forall(f) + +

+ + +
+ +

The forall function allows for mapping a given boolean function to a list of arguments and return a single boolean value.

+

It does this by creating a Spark UDF which takes in a list of arguments, applying the given boolean function to +each element of the list and returning a single boolean value if all the elements pass through the given boolean function.

+ +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
f + Callable[[Any], bool] +

A callable function f which takes in any type and returns a boolean

+ required +
+ +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ UserDefinedFunction +

A spark UDF which accepts a list of arguments and returns True if all elements pass through the given boolean function, False otherwise.

+ +
+ Source code in quinn/functions.py +
def forall(f: Callable[[Any], bool]) -> udf:
+    """The **forall** function allows for mapping a given boolean function to a list of arguments and return a single boolean value.
+
+    It does this by creating a Spark UDF which takes in a list of arguments, applying the given boolean function to
+    each element of the list and returning a single boolean value if all the elements pass through the given boolean function.
+
+    :param f: A callable function ``f`` which takes in any type and returns a boolean
+    :return: A spark UDF which accepts a list of arguments and returns True if all
+    elements pass through the given boolean function, False otherwise.
+    :rtype: UserDefinedFunction
+    """
+
+    def temp_udf(list_: list) -> bool:
+        return all(map(f, list_))
+
+    return F.udf(temp_udf, BooleanType())
+
+
+ +
+ +
+ + + +

+multi_equals(value) + +

+ + +
+ +

Create a user-defined function that checks if all the given columns have the designated value.

+ +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
value + Any +

The designated value.

+ required +
+ +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ UserDifinedFunction +

A user-defined function of type BooleanType().

+ +
+ Source code in quinn/functions.py +
def multi_equals(value: Any) -> udf:  # noqa: ANN401
+    """Create a user-defined function that checks if all the given columns have the designated value.
+
+    :param value: The designated value.
+    :type value: Any
+    :return: A user-defined function of type BooleanType().
+    :rtype: UserDifinedFunction
+    """
+
+    def temp_udf(*cols) -> bool:  # noqa: ANN002
+        return all(map(lambda col: col == value, cols))  # noqa: C417
+
+    return F.udf(temp_udf, BooleanType())
+
+
+ +
+ +
+ + + +

+regexp_extract_all(s, regexp) + +

+ + +
+ +

Function uses the Python re library to extract regular expressions from a string (s) using a regex pattern (regexp).

+

It returns a list of all matches, or None if s is None.

+ +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
s + Column +

input string (Column)

+ required +
regexp + Column +

string re pattern

+ required +
+ +
+ Source code in quinn/functions.py +
@F.udf(returnType=ArrayType(StringType()))
+def regexp_extract_all(s: Column, regexp: Column) -> Column:
+    """Function uses the Python `re` library to extract regular expressions from a string (`s`) using a regex pattern (`regexp`).
+
+    It returns a list of all matches, or    `None` if `s` is `None`.
+
+    :param s: input string (`Column`)
+    :type s: str
+    :param regexp: string `re` pattern
+    :rtype: Column
+    """
+    return None if s is None else re.findall(regexp, s)
+
+
+ +
+ +
+ + + +

+remove_all_whitespace(col) + +

+ + +
+ +

Function takes a Column object as a parameter and returns a Column object with all white space removed.

+

It does this using the regexp_replace function from F, which replaces all whitespace with an empty string.

+ +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
col + Column +

a Column object

+ required +
+ +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ Column +

a Column object with all white space removed

+ +
+ Source code in quinn/functions.py +
def remove_all_whitespace(col: Column) -> Column:
+    """Function takes a `Column` object as a parameter and returns a `Column` object with all white space removed.
+
+    It does this using the regexp_replace function from F, which replaces all whitespace with an empty string.
+
+    :param col: a `Column` object
+    :type col: Column
+    :returns: a `Column` object with all white space removed
+    :rtype: Column
+    """
+    return F.regexp_replace(col, "\\s+", "")
+
+
+ +
+ +
+ + + +

+remove_non_word_characters(col) + +

+ + +
+ +

Removes non-word characters from a column.

+

The non-word characters which will be removed are those identified by the +regular expression "[^\\w\\s]+". This expression represents any character +that is not a word character (e.g. \\w) or whitespace (\\s).

+ +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
col + Column +

A Column object.

+ required +
+ +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ Column +

A Column object with non-word characters removed.

+ +
+ Source code in quinn/functions.py +
def remove_non_word_characters(col: Column) -> Column:
+    r"""Removes non-word characters from a column.
+
+    The non-word characters which will be removed are those identified by the
+    regular expression ``"[^\\w\\s]+"``.  This expression represents any character
+    that is not a word character (e.g. `\\w`) or whitespace (`\\s`).
+
+    :param col: A Column object.
+    :return: A Column object with non-word characters removed.
+
+    """
+    return F.regexp_replace(col, "[^\\w\\s]+", "")
+
+
+ +
+ +
+ + + +

+single_space(col) + +

+ + +
+ +

Function takes a column and replaces all the multiple white spaces with a single space.

+

It then trims the column to make all the texts consistent.

+ +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
col + Column +

The column which needs to be spaced

+ required +
+ +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ Column +

A trimmed column with single space

+ +
+ Source code in quinn/functions.py +
def single_space(col: Column) -> Column:
+    """Function takes a column and replaces all the multiple white spaces with a single space.
+
+    It then trims the column to make all the texts consistent.
+
+    :param col: The column which needs to be spaced
+    :type col: Column
+    :returns: A trimmed column with single space
+    :rtype: Column
+    """
+    return F.trim(F.regexp_replace(col, " +", " "))
+
+
+ +
+ +
+ + + +

+uuid5(col, namespace=uuid.NAMESPACE_DNS, extra_string='') + +

+ + +
+ +

Function generates UUIDv5 from col and namespace, optionally prepending an extra string to col.

+

Sets variant to RFC 4122 one.

+ +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
col + Column +

Column that will be hashed.

+ required +
namespace + uuid.UUID +

Namespace to be used. (default: uuid.NAMESPACE_DNS)

+ uuid.NAMESPACE_DNS +
extra_string + str +

In case of collisions one can pass an extra string to hash on.

+ '' +
+ +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ Column +

String representation of generated UUIDv5

+ +
+ Source code in quinn/functions.py +
def uuid5(
+    col: Column,
+    namespace: uuid.UUID = uuid.NAMESPACE_DNS,
+    extra_string: str = "",
+) -> Column:
+    """Function generates UUIDv5 from ``col`` and ``namespace``, optionally prepending an extra string to ``col``.
+
+    Sets variant to RFC 4122 one.
+
+    :param col: Column that will be hashed.
+    :type col: Column
+    :param namespace: Namespace to be used. (default: `uuid.NAMESPACE_DNS`)
+    :type namespace: str
+    :param extra_string: In case of collisions one can pass an extra string to hash on.
+    :type extra_string: str
+    :return: String representation of generated UUIDv5
+    :rtype: Column
+    """
+    ns = F.lit(namespace.bytes)
+    salted_col = F.concat(F.lit(extra_string), col)
+    encoded = F.encode(salted_col, "utf-8")
+    encoded_with_ns = F.concat(ns, encoded)
+    hashed = F.sha1(encoded_with_ns)
+    variant_part = F.substring(hashed, 17, 4)
+    variant_part = F.conv(variant_part, 16, 2)
+    variant_part = F.lpad(variant_part, 16, "0")
+    variant_part = F.concat(
+        F.lit("10"),
+        F.substring(variant_part, 3, 16),
+    )  # RFC 4122 variant.
+    variant_part = F.lower(F.conv(variant_part, 2, 16))
+    return F.concat_ws(
+        "-",
+        F.substring(hashed, 1, 8),
+        F.substring(hashed, 9, 4),
+        F.concat(F.lit("5"), F.substring(hashed, 14, 3)),  # Set version.
+        variant_part,
+        F.substring(hashed, 21, 12),
+    )
+
+
+ +
+ +
+ + + +

+week_end_date(col, week_end_day='Sat') + +

+ + +
+ +

Return a date column for the end of week for a given day.

+

The Spark function dayofweek considers Sunday as the first day of the week, and +uses the default value of 1 to indicate Sunday. Usage of the when and otherwise +functions allow a comparison between the end of week day indicated and the day +of week computed, and the return of the reference date if they match or the the +addition of one week to the reference date otherwise.

+ +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
col + Column +

The reference date column.

+ required +
week_end_day + str +

The week end day (default: 'Sat')

+ 'Sat' +
+ +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ Column +

A Column of end of the week dates.

+ +
+ Source code in quinn/functions.py +
def week_end_date(col: Column, week_end_day: str = "Sat") -> Column:
+    """Return a date column for the end of week for a given day.
+
+    The Spark function `dayofweek` considers Sunday as the first day of the week, and
+    uses the default value of 1 to indicate Sunday. Usage of the `when` and `otherwise`
+    functions allow a comparison between the end of week day indicated and the day
+    of week computed, and the return of the reference date if they match or the the
+    addition of one week to the reference date otherwise.
+
+    :param col: The reference date column.
+    :type col: Column
+    :param week_end_day: The week end day (default: 'Sat')
+    :type week_end_day: str
+    :return: A Column of end of the week dates.
+    :rtype: Column
+    """
+    _raise_if_invalid_day(week_end_day)
+    # these are the default Spark mappings.  Spark considers Sunday the first day of the week.
+    day_of_week_mapping = {
+        "Sun": 1,
+        "Mon": 2,
+        "Tue": 3,
+        "Wed": 4,
+        "Thu": 5,
+        "Fri": 6,
+        "Sat": 7,
+    }
+    return F.when(
+        F.dayofweek(col).eqNullSafe(F.lit(day_of_week_mapping[week_end_day])),
+        col,
+    ).otherwise(F.next_day(col, week_end_day))
+
+
+ +
+ +
+ + + +

+week_start_date(col, week_start_day='Sun') + +

+ + +
+ +

Function takes a Spark Column and an optional week_start_day argument and returns a Column with the corresponding start of week dates.

+

The "standard week" in Spark starts on Sunday, however an optional argument can be +used to start the week from a different day, e.g. Monday. The week_start_day +argument is a string corresponding to the day of the week to start the week +from, e.g. "Mon", "Tue", and must be in the set: {"Sun", "Mon", "Tue", "Wed", +"Thu", "Fri", "Sat"}. If the argument given is not a valid day then a ValueError +will be raised.

+ +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
col + Column +

The column to determine start of week dates on

+ required +
week_start_day + str +

The day to start the week on

+ 'Sun' +
+ +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ Column +

A Column with start of week dates

+ +
+ Source code in quinn/functions.py +
def week_start_date(col: Column, week_start_day: str = "Sun") -> Column:
+    """Function takes a Spark `Column` and an optional `week_start_day` argument and returns a `Column` with the corresponding start of week dates.
+
+    The "standard week" in Spark starts on Sunday, however an optional argument can be
+    used to start the week from a different day, e.g. Monday. The `week_start_day`
+    argument is a string corresponding to the day of the week to start the week
+    from, e.g. `"Mon"`, `"Tue"`, and must be in the set: `{"Sun", "Mon", "Tue", "Wed",
+    "Thu", "Fri", "Sat"}`. If the argument given is not a valid day then a `ValueError`
+    will be raised.
+
+    :param col: The column to determine start of week dates on
+    :type col: Column
+    :param week_start_day: The day to start the week on
+    :type week_start_day: str
+    :returns: A Column with start of week dates
+    :rtype: Column
+    """
+    _raise_if_invalid_day(week_start_day)
+    # the "standard week" in Spark is from Sunday to Saturday
+    mapping = {
+        "Sun": "Sat",
+        "Mon": "Sun",
+        "Tue": "Mon",
+        "Wed": "Tue",
+        "Thu": "Wed",
+        "Fri": "Thu",
+        "Sat": "Fri",
+    }
+    end = week_end_date(col, mapping[week_start_day])
+    return F.date_add(end, -6)
+
+
+ +
+ + + +
+ +
+ +
+ +
+
+ +
+
+ +
+ +
+ +
+ + + + + +
+ + + + + + + + + diff --git a/reference/quinn/index.html b/reference/quinn/index.html new file mode 100644 index 00000000..e5d5b5df --- /dev/null +++ b/reference/quinn/index.html @@ -0,0 +1,3145 @@ + + + + + + + + Index - Quinn + + + + + + + + + + + + + + +
+ + +
+ +
+
+
    +
  • + +
  • +
  • +
+
+
+
+
+ +
+ + + +
+ +

quinn API.

+ + + +
+ + + + + + + + +
+ + + +

+ DataFrameMissingColumnError + + +

+ + +
+

+ Bases: ValueError

+ + +

Raise this when there's a DataFrame column error.

+ + +
+ Source code in quinn/dataframe_validator.py +
class DataFrameMissingColumnError(ValueError):
+    """Raise this when there's a DataFrame column error."""
+
+ +
+ +
+ +
+ + + +

+ DataFrameMissingStructFieldError + + +

+ + +
+

+ Bases: ValueError

+ + +

Raise this when there's a DataFrame column error.

+ + +
+ Source code in quinn/dataframe_validator.py +
class DataFrameMissingStructFieldError(ValueError):
+    """Raise this when there's a DataFrame column error."""
+
+ +
+ +
+ +
+ + + +

+ DataFrameProhibitedColumnError + + +

+ + +
+

+ Bases: ValueError

+ + +

Raise this when a DataFrame includes prohibited columns.

+ + +
+ Source code in quinn/dataframe_validator.py +
class DataFrameProhibitedColumnError(ValueError):
+    """Raise this when a DataFrame includes prohibited columns."""
+
+ +
+ +
+ + +
+ + + +

+anti_trim(col) + +

+ + +
+ +

Remove whitespace from the boundaries of col using the regexp_replace function.

+ +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
col + Column +

Column on which to perform the regexp_replace.

+ required +
+ +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ Column +

A new Column with all whitespace removed from the boundaries.

+ +
+ Source code in quinn/functions.py +
def anti_trim(col: Column) -> Column:
+    """Remove whitespace from the boundaries of ``col`` using the regexp_replace function.
+
+    :param col: Column on which to perform the regexp_replace.
+    :type col: Column
+    :return: A new Column with all whitespace removed from the boundaries.
+    :rtype: Column
+    """
+    return F.regexp_replace(col, "\\b\\s+\\b", "")
+
+
+ +
+ +
+ + + +

+approx_equal(col1, col2, threshold) + +

+ + +
+ +

Compare two Column objects by checking if the difference between them is less than a specified threshold.

+ +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
col1 + Column +

the first Column

+ required +
col2 + Column +

the second Column

+ required +
threshold + Number +

value to compare with

+ required +
+ +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ Column +

Boolean Column with True indicating that abs(col1 - col2) is less than threshold

+ +
+ Source code in quinn/functions.py +
def approx_equal(col1: Column, col2: Column, threshold: Number) -> Column:
+    """Compare two ``Column`` objects by checking if the difference between them is less than a specified ``threshold``.
+
+    :param col1: the first ``Column``
+    :type col1: Column
+    :param col2: the second ``Column``
+    :type col2: Column
+    :param threshold: value to compare with
+    :type threshold: Number
+    :return: Boolean ``Column`` with ``True`` indicating that ``abs(col1 -
+    col2)`` is less than ``threshold``
+    """
+    return F.abs(col1 - col2) < threshold
+
+
+ +
+ +
+ + + +

+business_days_between(start_date, end_date) + +

+ + +
+ +

Function takes two Spark Columns and returns a Column with the number of business days between the start and the end date.

+ +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
start_date + Column +

The column with the start dates

+ required +
end_date + Column +

The column with the end dates

+ required +
+ +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ Column +

a Column with the number of business days between the start and the end date

+ +
+ Source code in quinn/functions.py +
def business_days_between(start_date: Column, end_date: Column) -> Column:  # noqa: ARG001
+    """Function takes two Spark `Columns` and returns a `Column` with the number of business days between the start and the end date.
+
+    :param start_date: The column with the start dates
+    :type start_date: Column
+    :param end_date: The column with the end dates
+    :type end_date: Column
+    :returns: a Column with the number of business days between the start and the end date
+    :rtype: Column
+    """
+    all_days = "sequence(start_date, end_date)"
+    days_of_week = f"transform({all_days}, x -> date_format(x, 'E'))"
+    filter_weekends = F.expr(f"filter({days_of_week}, x -> x NOT IN ('Sat', 'Sun'))")
+    num_business_days = F.size(filter_weekends) - 1
+
+    return F.when(num_business_days < 0, None).otherwise(num_business_days)
+
+
+ +
+ +
+ + + +

+column_to_list(df, col_name) + +

+ + +
+ +

Collect column to list of values.

+ +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
df + DataFrame +

Input DataFrame

+ required +
col_name + str +

Column to collect

+ required +
+ +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ List[Any] +

List of values

+ +
+ Source code in quinn/dataframe_helpers.py +
def column_to_list(df: DataFrame, col_name: str) -> list[Any]:
+    """Collect column to list of values.
+
+    :param df: Input DataFrame
+    :type df: pyspark.sql.DataFrame
+    :param col_name: Column to collect
+    :type col_name: str
+    :return: List of values
+    :rtype: List[Any]
+    """
+    return [x[col_name] for x in df.select(col_name).collect()]
+
+
+ +
+ +
+ + + +

+create_df(spark, rows_data, col_specs) + +

+ + +
+ +

Create a new DataFrame from the given data and column specs.

+

The returned DataFrame s created using the StructType and StructField classes provided by PySpark.

+ +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
spark + SparkSession +

SparkSession object

+ required +
rows_data + array-like +

the data used to create the DataFrame

+ required +
col_specs + list of tuples +

list of tuples containing the name and type of the field

+ required +
+ +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ DataFrame +

a new DataFrame

+ +
+ Source code in quinn/dataframe_helpers.py +
def create_df(spark: SparkSession, rows_data, col_specs) -> DataFrame:  # noqa: ANN001
+    """Create a new DataFrame from the given data and column specs.
+
+    The returned DataFrame s created using the StructType and StructField classes provided by PySpark.
+
+    :param spark: SparkSession object
+    :type spark: SparkSession
+    :param rows_data: the data used to create the DataFrame
+    :type rows_data: array-like
+    :param col_specs: list of tuples containing the name and type of the field
+    :type col_specs: list of tuples
+    :return: a new DataFrame
+    :rtype: DataFrame
+    """
+    struct_fields = list(map(lambda x: StructField(*x), col_specs))  # noqa: C417
+    return spark.createDataFrame(data=rows_data, schema=StructType(struct_fields))
+
+
+ +
+ +
+ + + +

+exists(f) + +

+ + +
+ +

Create a user-defined function.

+

It takes a list expressed as a column of type ArrayType(AnyType) as an argument and returns a boolean value indicating +whether any element in the list is true according to the argument f of the exists() function.

+ +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
f + Callable[[Any], bool] +

Callable function - A callable function that takes an element of type Any and returns a boolean value.

+ required +
+ +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ UserDefinedFunction +

A user-defined function that takes a list expressed as a column of type ArrayType(AnyType) as an argument and returns a boolean value indicating whether any element in the list is true according to the argument f of the exists() function.

+ +
+ Source code in quinn/functions.py +
def exists(f: Callable[[Any], bool]) -> udf:
+    """Create a user-defined function.
+
+    It takes a list expressed as a column of type ``ArrayType(AnyType)`` as an argument and returns a boolean value indicating
+    whether any element in the list is true according to the argument ``f`` of the ``exists()`` function.
+
+    :param f: Callable function - A callable function that takes an element of
+    type Any and returns a boolean value.
+    :return: A user-defined function that takes
+    a list expressed as a column of type ArrayType(AnyType) as an argument and
+    returns a boolean value indicating whether any element in the list is true
+    according to the argument ``f`` of the ``exists()`` function.
+    :rtype: UserDefinedFunction
+    """
+
+    def temp_udf(list_: list) -> bool:
+        return any(map(f, list_))
+
+    return F.udf(temp_udf, BooleanType())
+
+
+ +
+ +
+ + + +

+forall(f) + +

+ + +
+ +

The forall function allows for mapping a given boolean function to a list of arguments and return a single boolean value.

+

It does this by creating a Spark UDF which takes in a list of arguments, applying the given boolean function to +each element of the list and returning a single boolean value if all the elements pass through the given boolean function.

+ +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
f + Callable[[Any], bool] +

A callable function f which takes in any type and returns a boolean

+ required +
+ +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ UserDefinedFunction +

A spark UDF which accepts a list of arguments and returns True if all elements pass through the given boolean function, False otherwise.

+ +
+ Source code in quinn/functions.py +
def forall(f: Callable[[Any], bool]) -> udf:
+    """The **forall** function allows for mapping a given boolean function to a list of arguments and return a single boolean value.
+
+    It does this by creating a Spark UDF which takes in a list of arguments, applying the given boolean function to
+    each element of the list and returning a single boolean value if all the elements pass through the given boolean function.
+
+    :param f: A callable function ``f`` which takes in any type and returns a boolean
+    :return: A spark UDF which accepts a list of arguments and returns True if all
+    elements pass through the given boolean function, False otherwise.
+    :rtype: UserDefinedFunction
+    """
+
+    def temp_udf(list_: list) -> bool:
+        return all(map(f, list_))
+
+    return F.udf(temp_udf, BooleanType())
+
+
+ +
+ +
+ + + +

+multi_equals(value) + +

+ + +
+ +

Create a user-defined function that checks if all the given columns have the designated value.

+ +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
value + Any +

The designated value.

+ required +
+ +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ UserDifinedFunction +

A user-defined function of type BooleanType().

+ +
+ Source code in quinn/functions.py +
def multi_equals(value: Any) -> udf:  # noqa: ANN401
+    """Create a user-defined function that checks if all the given columns have the designated value.
+
+    :param value: The designated value.
+    :type value: Any
+    :return: A user-defined function of type BooleanType().
+    :rtype: UserDifinedFunction
+    """
+
+    def temp_udf(*cols) -> bool:  # noqa: ANN002
+        return all(map(lambda col: col == value, cols))  # noqa: C417
+
+    return F.udf(temp_udf, BooleanType())
+
+
+ +
+ +
+ + + +

+print_athena_create_table(df, athena_table_name, s3location) + +

+ + +
+ +

Generate the Athena create table statement for a given DataFrame.

+ +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
df + DataFrame +

The pyspark.sql.DataFrame to use

+ required +
athena_table_name + str +

The name of the athena table to generate

+ required +
s3location + str +

The S3 location of the parquet data

+ required +
+ +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ None +

None

+ +
+ Source code in quinn/dataframe_helpers.py +
def print_athena_create_table(
+    df: DataFrame,
+    athena_table_name: str,
+    s3location: str,
+) -> None:
+    """Generate the Athena create table statement for a given DataFrame.
+
+    :param df: The pyspark.sql.DataFrame to use
+    :param athena_table_name: The name of the athena table to generate
+    :param s3location: The S3 location of the parquet data
+    :return: None
+    """
+    fields = df.schema
+
+    print(f"CREATE EXTERNAL TABLE IF NOT EXISTS `{athena_table_name}` ( ")
+
+    for field in fields.fieldNames()[:-1]:
+        print("\t", f"`{fields[field].name}` {fields[field].dataType.simpleString()}, ")
+    last = fields[fields.fieldNames()[-1]]
+    print("\t", f"`{last.name}` {last.dataType.simpleString()} ")
+
+    print(")")
+    print("STORED AS PARQUET")
+    print(f"LOCATION '{s3location}'\n")
+
+
+ +
+ +
+ + + +

+print_schema_as_code(dtype) + +

+ + +
+ +

Represent DataType (including StructType) as valid Python code.

+ +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
dtype + T.DataType +

The input DataType or Schema object

+ required +
+ +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ str +

A valid python code which generate the same schema.

+ +
+ Source code in quinn/schema_helpers.py +
def print_schema_as_code(dtype: T.DataType) -> str:
+    """Represent DataType (including StructType) as valid Python code.
+
+    :param dtype: The input DataType or Schema object
+    :type dtype: pyspark.sql.types.DataType
+    :return: A valid python code which generate the same schema.
+    :rtype: str
+    """
+    res = []
+    if isinstance(dtype, T.StructType):
+        res.append("StructType(\n\tfields=[")
+        for field in dtype.fields:
+            for line in _repr_column(field).split("\n"):
+                res.append("\n\t\t")
+                res.append(line)
+            res.append(",")
+        res.append("\n\t]\n)")
+
+    elif isinstance(dtype, T.ArrayType):
+        res.append("ArrayType(")
+        res.append(print_schema_as_code(dtype.elementType))
+        res.append(")")
+
+    elif isinstance(dtype, T.MapType):
+        res.append("MapType(")
+        res.append(f"\n\t{print_schema_as_code(dtype.keyType)},")
+        for line in print_schema_as_code(dtype.valueType).split("\n"):
+            res.append("\n\t")
+            res.append(line)
+        res.append(",")
+        res.append(f"\n\t{dtype.valueContainsNull},")
+        res.append("\n)")
+
+    elif isinstance(dtype, T.DecimalType):
+        res.append(f"DecimalType({dtype.precision}, {dtype.scale})")
+
+    elif str(dtype).endswith("()"):
+        # PySpark 3.3+
+        res.append(str(dtype))
+    else:
+        res.append(f"{dtype}()")
+
+    return "".join(res)
+
+
+ +
+ +
+ + + +

+regexp_extract_all(s, regexp) + +

+ + +
+ +

Function uses the Python re library to extract regular expressions from a string (s) using a regex pattern (regexp).

+

It returns a list of all matches, or None if s is None.

+ +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
s + Column +

input string (Column)

+ required +
regexp + Column +

string re pattern

+ required +
+ +
+ Source code in quinn/functions.py +
@F.udf(returnType=ArrayType(StringType()))
+def regexp_extract_all(s: Column, regexp: Column) -> Column:
+    """Function uses the Python `re` library to extract regular expressions from a string (`s`) using a regex pattern (`regexp`).
+
+    It returns a list of all matches, or    `None` if `s` is `None`.
+
+    :param s: input string (`Column`)
+    :type s: str
+    :param regexp: string `re` pattern
+    :rtype: Column
+    """
+    return None if s is None else re.findall(regexp, s)
+
+
+ +
+ +
+ + + +

+remove_all_whitespace(col) + +

+ + +
+ +

Function takes a Column object as a parameter and returns a Column object with all white space removed.

+

It does this using the regexp_replace function from F, which replaces all whitespace with an empty string.

+ +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
col + Column +

a Column object

+ required +
+ +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ Column +

a Column object with all white space removed

+ +
+ Source code in quinn/functions.py +
def remove_all_whitespace(col: Column) -> Column:
+    """Function takes a `Column` object as a parameter and returns a `Column` object with all white space removed.
+
+    It does this using the regexp_replace function from F, which replaces all whitespace with an empty string.
+
+    :param col: a `Column` object
+    :type col: Column
+    :returns: a `Column` object with all white space removed
+    :rtype: Column
+    """
+    return F.regexp_replace(col, "\\s+", "")
+
+
+ +
+ +
+ + + +

+remove_non_word_characters(col) + +

+ + +
+ +

Removes non-word characters from a column.

+

The non-word characters which will be removed are those identified by the +regular expression "[^\\w\\s]+". This expression represents any character +that is not a word character (e.g. \\w) or whitespace (\\s).

+ +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
col + Column +

A Column object.

+ required +
+ +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ Column +

A Column object with non-word characters removed.

+ +
+ Source code in quinn/functions.py +
def remove_non_word_characters(col: Column) -> Column:
+    r"""Removes non-word characters from a column.
+
+    The non-word characters which will be removed are those identified by the
+    regular expression ``"[^\\w\\s]+"``.  This expression represents any character
+    that is not a word character (e.g. `\\w`) or whitespace (`\\s`).
+
+    :param col: A Column object.
+    :return: A Column object with non-word characters removed.
+
+    """
+    return F.regexp_replace(col, "[^\\w\\s]+", "")
+
+
+ +
+ +
+ + + +

+show_output_to_df(show_output, spark) + +

+ + +
+ +

Show output as spark DataFrame.

+ +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
show_output + str +

String representing output of 'show' command in spark

+ required +
spark + SparkSession +

SparkSession object

+ required +
+ +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ Dataframe +

DataFrame object containing output of a show command in spark

+ +
+ Source code in quinn/dataframe_helpers.py +
def show_output_to_df(show_output: str, spark: SparkSession) -> DataFrame:
+    """Show output as spark DataFrame.
+
+    :param show_output: String representing output of 'show' command in spark
+    :type show_output: str
+    :param spark: SparkSession object
+    :type spark: SparkSession
+    :return: DataFrame object containing output of a show command in spark
+    :rtype: Dataframe
+    """
+    lines = show_output.split("\n")
+    ugly_column_names = lines[1]
+    pretty_column_names = [i.strip() for i in ugly_column_names[1:-1].split("|")]
+    pretty_data = []
+    ugly_data = lines[3:-1]
+    for row in ugly_data:
+        r = [i.strip() for i in row[1:-1].split("|")]
+        pretty_data.append(tuple(r))
+    return spark.createDataFrame(pretty_data, pretty_column_names)
+
+
+ +
+ +
+ + + +

+single_space(col) + +

+ + +
+ +

Function takes a column and replaces all the multiple white spaces with a single space.

+

It then trims the column to make all the texts consistent.

+ +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
col + Column +

The column which needs to be spaced

+ required +
+ +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ Column +

A trimmed column with single space

+ +
+ Source code in quinn/functions.py +
def single_space(col: Column) -> Column:
+    """Function takes a column and replaces all the multiple white spaces with a single space.
+
+    It then trims the column to make all the texts consistent.
+
+    :param col: The column which needs to be spaced
+    :type col: Column
+    :returns: A trimmed column with single space
+    :rtype: Column
+    """
+    return F.trim(F.regexp_replace(col, " +", " "))
+
+
+ +
+ +
+ + + +

+snake_case_col_names(df) + +

+ + +
+ +

Function takes a DataFrame instance and returns the same DataFrame instance with all column names converted to snake case.

+

(e.g. col_name_1). It uses the to_snake_case function in conjunction with +the with_columns_renamed function to achieve this.

+ +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
df + DataFrame +

A DataFrame instance to process

+ required +
+ +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ ``DataFrame``. +

A DataFrame instance with column names converted to snake case

+ +
+ Source code in quinn/transformations.py +
def snake_case_col_names(df: DataFrame) -> DataFrame:
+    """Function takes a ``DataFrame`` instance and returns the same ``DataFrame`` instance with all column names converted to snake case.
+
+    (e.g. ``col_name_1``). It uses the ``to_snake_case`` function in conjunction with
+    the ``with_columns_renamed`` function to achieve this.
+    :param df: A ``DataFrame`` instance to process
+    :type df: ``DataFrame``
+    :return: A ``DataFrame`` instance with column names converted to snake case
+    :rtype: ``DataFrame``.
+    """
+    return with_columns_renamed(to_snake_case)(df)
+
+
+ +
+ +
+ + + +

+sort_columns(df, sort_order, sort_nested=False) + +

+ + +
+ +

This function sorts the columns of a given DataFrame based on a given sort +order. The sort_order parameter can either be asc or desc, which correspond to +ascending and descending order, respectively. If any other value is provided for +the sort_order parameter, a ValueError will be raised.

+ +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
df + DataFrame +

A DataFrame

+ required +
sort_order + str +

The order in which to sort the columns in the DataFrame

+ required +
sort_nested + bool +

Whether to sort nested structs or not. Defaults to false.

+ False +
+ +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ pyspark.sql.DataFrame +

A DataFrame with the columns sorted in the chosen order

+ +
+ Source code in quinn/transformations.py +
def sort_columns( # noqa: C901,PLR0915
+        df: DataFrame, sort_order: str, sort_nested: bool = False,
+) -> DataFrame:
+    """This function sorts the columns of a given DataFrame based on a given sort
+    order. The ``sort_order`` parameter can either be ``asc`` or ``desc``, which correspond to
+    ascending and descending order, respectively. If any other value is provided for
+    the ``sort_order`` parameter, a ``ValueError`` will be raised.
+
+    :param df: A DataFrame
+    :type df: pyspark.sql.DataFrame
+    :param sort_order: The order in which to sort the columns in the DataFrame
+    :type sort_order: str
+    :param sort_nested: Whether to sort nested structs or not. Defaults to false.
+    :type sort_nested: bool
+    :return: A DataFrame with the columns sorted in the chosen order
+    :rtype: pyspark.sql.DataFrame
+    """
+
+    def sort_nested_cols(schema, is_reversed, base_field="") -> list[str]: # noqa: ANN001
+        # recursively check nested fields and sort them
+        # https://stackoverflow.com/questions/57821538/how-to-sort-columns-of-nested-structs-alphabetically-in-pyspark
+        # Credits: @pault for logic
+
+        def parse_fields(
+            fields_to_sort: list, parent_struct, is_reversed: bool, # noqa: ANN001
+        ) -> list:
+            sorted_fields: list = sorted(
+                fields_to_sort,
+                key=lambda x: x["name"],
+                reverse=is_reversed,
+            )
+
+            results = []
+            for field in sorted_fields:
+                new_struct = StructType([StructField.fromJson(field)])
+                new_base_field = parent_struct.name
+                if base_field:
+                    new_base_field = base_field + "." + new_base_field
+
+                results.extend(
+                    sort_nested_cols(new_struct, is_reversed, base_field=new_base_field),
+                )
+            return results
+
+        select_cols = []
+        for parent_struct in sorted(schema, key=lambda x: x.name, reverse=is_reversed):
+            field_type = parent_struct.dataType
+            if isinstance(field_type, ArrayType):
+                array_parent = parent_struct.jsonValue()["type"]["elementType"]
+                base_str = f"transform({parent_struct.name}"
+                suffix_str = f") AS {parent_struct.name}"
+
+                # if struct in array, create mapping to struct
+                if array_parent["type"] == "struct":
+                    array_parent = array_parent["fields"]
+                    base_str = f"{base_str}, x -> struct("
+                    suffix_str = f"){suffix_str}"
+
+                array_elements = parse_fields(array_parent, parent_struct, is_reversed)
+                element_names = [i.split(".")[-1] for i in array_elements]
+                array_elements_formatted = [f"x.{i} as {i}" for i in element_names]
+
+                # create a string representation of the sorted array
+                # ex: transform(phone_numbers, x -> struct(x.number as number, x.type as type)) AS phone_numbers
+                result = f"{base_str}{', '.join(array_elements_formatted)}{suffix_str}"
+
+            elif isinstance(field_type, StructType):
+                field_list = parent_struct.jsonValue()["type"]["fields"]
+                sub_fields = parse_fields(field_list, parent_struct, is_reversed)
+
+                # create a string representation of the sorted struct
+                # ex: struct(address.zip.first5, address.zip.last4) AS zip
+                result = f"struct({', '.join(sub_fields)}) AS {parent_struct.name}"
+
+            elif base_field:
+                result = f"{base_field}.{parent_struct.name}"
+            else:
+                result = parent_struct.name
+            select_cols.append(result)
+
+        return select_cols
+
+    def get_original_nullability(field: StructField, result_dict: dict) -> None:
+        if hasattr(field, "nullable"):
+            result_dict[field.name] = field.nullable
+        else:
+            result_dict[field.name] = True
+
+        if not isinstance(field.dataType, StructType) and not isinstance(
+            field.dataType, ArrayType,
+        ):
+            return
+
+        if isinstance(field.dataType, ArrayType):
+            result_dict[f"{field.name}_element"] = field.dataType.containsNull
+            children = field.dataType.elementType.fields
+        else:
+            children = field.dataType.fields
+        for i in children:
+            get_original_nullability(i, result_dict)
+
+    def fix_nullability(field: StructField, result_dict: dict) -> None:
+        field.nullable = result_dict[field.name]
+        if not isinstance(field.dataType, StructType) and not isinstance(
+            field.dataType, ArrayType,
+        ):
+            return
+
+        if isinstance(field.dataType, ArrayType):
+            # save the containsNull property of the ArrayType
+            field.dataType.containsNull = result_dict[f"{field.name}_element"]
+            children = field.dataType.elementType.fields
+        else:
+            children = field.dataType.fields
+
+        for i in children:
+            fix_nullability(i, result_dict)
+
+    if sort_order not in ["asc", "desc"]:
+        msg = f"['asc', 'desc'] are the only valid sort orders and you entered a sort order of '{sort_order}'"
+        raise ValueError(
+            msg,
+        )
+    reverse_lookup = {
+        "asc": False,
+        "desc": True,
+    }
+
+    is_reversed: bool = reverse_lookup[sort_order]
+    top_level_sorted_df = df.select(*sorted(df.columns, reverse=is_reversed))
+    if not sort_nested:
+        return top_level_sorted_df
+
+    is_nested: bool = any(
+        isinstance(i.dataType, (StructType, ArrayType))
+            for i in top_level_sorted_df.schema
+    )
+
+    if not is_nested:
+        return top_level_sorted_df
+
+    fully_sorted_schema = sort_nested_cols(top_level_sorted_df.schema, is_reversed)
+    output = df.selectExpr(fully_sorted_schema)
+    result_dict = {}
+    for field in df.schema:
+        get_original_nullability(field, result_dict)
+
+    for field in output.schema:
+        fix_nullability(field, result_dict)
+
+    if not hasattr(SparkSession, "getActiveSession"): # spark 2.4
+        spark = SparkSession.builder.getOrCreate()
+    else:
+        spark = SparkSession.getActiveSession()
+        spark = spark if spark is not None else SparkSession.builder.getOrCreate()
+
+    return spark.createDataFrame(output.rdd, output.schema)
+
+
+ +
+ +
+ + + +

+split_col(df, col_name, delimiter, new_col_names, mode='permissive', default=None) + +

+ + +
+ +

Splits the given column based on the delimiter and creates new columns with the split values.

+ +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
df + DataFrame +

The input DataFrame

+ required +
col_name + str +

The name of the column to split

+ required +
delimiter + str +

The delimiter to split the column on

+ required +
new_col_names + list[str] +

A list of two strings for the new column names

+ required +
mode + str +

The split mode. Can be "strict" or "permissive". Default is "permissive"

+ 'permissive' +
default + Optional[str] +

If the mode is "permissive" then default value will be assigned to column

+ None +
+ +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ pyspark.sql.DataFrame. +

dataframe: The resulting DataFrame with the split columns

+ +
+ Source code in quinn/split_columns.py +
def split_col(  # noqa: PLR0913
+    df: DataFrame,
+    col_name: str,
+    delimiter: str,
+    new_col_names: list[str],
+    mode: str = "permissive",
+    default: Optional[str] = None,
+) -> DataFrame:
+    """Splits the given column based on the delimiter and creates new columns with the split values.
+
+    :param df: The input DataFrame
+    :type df: pyspark.sql.DataFrame
+    :param col_name: The name of the column to split
+    :type col_name: str
+    :param delimiter: The delimiter to split the column on
+    :type delimiter: str
+    :param new_col_names: A list of two strings for the new column names
+    :type new_col_names: (List[str])
+    :param mode: The split mode. Can be "strict" or "permissive". Default is "permissive"
+    :type mode: str
+    :param default: If the mode is "permissive" then default value will be assigned to column
+    :type mode: str
+    :return: dataframe: The resulting DataFrame with the split columns
+    :rtype: pyspark.sql.DataFrame.
+    """
+    # Check if the column to be split exists in the DataFrame
+    if col_name not in df.columns:
+        msg = f"Column '{col_name}' not found in DataFrame."
+        raise ValueError(msg)
+
+    # Check if the delimiter is a string
+    if not isinstance(delimiter, str):
+        msg = "Delimiter must be a string."
+        raise TypeError(msg)
+
+    # Check if the new column names are a list of strings
+    if not isinstance(new_col_names, list):
+        msg = "New column names must be a list of strings."
+        raise TypeError(msg)
+
+    # Define a UDF to check the occurrence of delimitter
+    def _num_delimiter(col_value1: str) -> int:
+        # Get the count of delimiter and store the result in no_of_delimiter
+        no_of_delimiter = col_value1.count(delimiter)
+        # Split col_value based on delimiter and store the result in split_value
+        split_value = col_value1.split(delimiter)
+
+        # Check if col_value is not None
+        if col_value1 is not None:
+            # Check if the no of delimiters in split_value is not as expected
+            if no_of_delimiter != len(new_col_names) - 1:
+                # If the length is not same, raise an IndexError with the message mentioning the expected and found length
+                msg = f"Expected {len(new_col_names)} elements after splitting on delimiter, found {len(split_value)} elements"
+                raise IndexError(
+                    msg,
+                )
+
+            # If the length of split_value is same as new_col_names, check if any of the split values is None or empty string
+            elif any(  # noqa: RET506
+                x is None or x.strip() == "" for x in split_value[: len(new_col_names)]
+            ):
+                msg = "Null or empty values are not accepted for columns in strict mode"
+                raise ValueError(
+                    msg,
+                )
+
+            # If the above checks pass, return the count of delimiter
+            return int(no_of_delimiter)
+
+        # If col_value is None, return 0
+        return 0
+
+    num_udf = udf(lambda y: None if y is None else _num_delimiter(y), IntegerType())
+
+    # Get the column expression for the column to be split
+    col_expr = df[col_name]
+
+    # Split the column by the delimiter
+    split_col_expr = split(trim(col_expr), delimiter)
+
+    # Check the split mode
+    if mode == "strict":
+        # Create an array of select expressions to create new columns from the split values
+        select_exprs = [
+            when(split_col_expr.getItem(i) != "", split_col_expr.getItem(i)).alias(
+                new_col_names[i],
+            )
+            for i in range(len(new_col_names))
+        ]
+
+        # Select all the columns from the input DataFrame, along with the new split columns
+        df = df.select("*", *select_exprs)  # noqa: PD901
+        df = df.withColumn("del_length", num_udf(df[col_name]))  # noqa: PD901
+        df.cache()
+        # Drop the original column if the new columns were created successfully
+        df = df.select([c for c in df.columns if c not in {"del_length", col_name}])  # noqa: PD901
+
+    elif mode == "permissive":
+        # Create an array of select expressions to create new columns from the split values
+        # Use the default value if a split value is missing or empty
+        select_exprs = select_exprs = [
+            when(length(split_col_expr.getItem(i)) > 0, split_col_expr.getItem(i))
+            .otherwise(default)
+            .alias(new_col_names[i])
+            for i in range(len(new_col_names))
+        ]
+
+        # Select all the columns from the input DataFrame, along with the new split columns
+        # Drop the original column if the new columns were created successfully
+        df = df.select("*", *select_exprs).drop(col_name)  # noqa: PD901
+        df.cache()
+
+    else:
+        msg = f"Invalid mode: {mode}"
+        raise ValueError(msg)
+
+    # Return the DataFrame with the split columns
+    return df
+
+
+ +
+ +
+ + + +

+to_list_of_dictionaries(df) + +

+ + +
+ +

Convert a Spark DataFrame to a list of dictionaries.

+ +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
df + DataFrame +

The Spark DataFrame to convert.

+ required +
+ +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ List[Dict[str, Any]] +

A list of dictionaries representing the rows in the DataFrame.

+ +
+ Source code in quinn/dataframe_helpers.py +
def to_list_of_dictionaries(df: DataFrame) -> list[dict[str, Any]]:
+    """Convert a Spark DataFrame to a list of dictionaries.
+
+    :param df: The Spark DataFrame to convert.
+    :type df: :py:class:`pyspark.sql.DataFrame`
+    :return: A list of dictionaries representing the rows in the DataFrame.
+    :rtype: List[Dict[str, Any]]
+    """
+    return list(map(lambda r: r.asDict(), df.collect()))  # noqa: C417
+
+
+ +
+ +
+ + + +

+to_snake_case(s) + +

+ + +
+ +

Takes a string and converts it to snake case format.

+ +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
s + str +

The string to be converted.

+ required +
+ +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ str +

The string in snake case format.

+ +
+ Source code in quinn/transformations.py +
def to_snake_case(s: str) -> str:
+    """Takes a string and converts it to snake case format.
+
+    :param s: The string to be converted.
+    :type s: str
+    :return: The string in snake case format.
+    :rtype: str
+    """
+    return s.lower().replace(" ", "_")
+
+
+ +
+ +
+ + + +

+two_columns_to_dictionary(df, key_col_name, value_col_name) + +

+ + +
+ +

Collect two columns as dictionary when first column is key and second is value.

+ +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
df + DataFrame +

Input DataFrame

+ required +
key_col_name + str +

Key-column

+ required +
value_col_name + str +

Value-column

+ required +
+ +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ Dict[str, Any] +

Dictionary with values

+ +
+ Source code in quinn/dataframe_helpers.py +
def two_columns_to_dictionary(
+    df: DataFrame,
+    key_col_name: str,
+    value_col_name: str,
+) -> dict[str, Any]:
+    """Collect two columns as dictionary when first column is key and second is value.
+
+    :param df: Input DataFrame
+    :type df: pyspark.sql.DataFrame
+    :param key_col_name: Key-column
+    :type key_col_name: str
+    :param value_col_name: Value-column
+    :type value_col_name: str
+    :return: Dictionary with values
+    :rtype: Dict[str, Any]
+    """
+    k, v = key_col_name, value_col_name
+    return {x[k]: x[v] for x in df.select(k, v).collect()}
+
+
+ +
+ +
+ + + +

+uuid5(col, namespace=uuid.NAMESPACE_DNS, extra_string='') + +

+ + +
+ +

Function generates UUIDv5 from col and namespace, optionally prepending an extra string to col.

+

Sets variant to RFC 4122 one.

+ +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
col + Column +

Column that will be hashed.

+ required +
namespace + uuid.UUID +

Namespace to be used. (default: uuid.NAMESPACE_DNS)

+ uuid.NAMESPACE_DNS +
extra_string + str +

In case of collisions one can pass an extra string to hash on.

+ '' +
+ +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ Column +

String representation of generated UUIDv5

+ +
+ Source code in quinn/functions.py +
def uuid5(
+    col: Column,
+    namespace: uuid.UUID = uuid.NAMESPACE_DNS,
+    extra_string: str = "",
+) -> Column:
+    """Function generates UUIDv5 from ``col`` and ``namespace``, optionally prepending an extra string to ``col``.
+
+    Sets variant to RFC 4122 one.
+
+    :param col: Column that will be hashed.
+    :type col: Column
+    :param namespace: Namespace to be used. (default: `uuid.NAMESPACE_DNS`)
+    :type namespace: str
+    :param extra_string: In case of collisions one can pass an extra string to hash on.
+    :type extra_string: str
+    :return: String representation of generated UUIDv5
+    :rtype: Column
+    """
+    ns = F.lit(namespace.bytes)
+    salted_col = F.concat(F.lit(extra_string), col)
+    encoded = F.encode(salted_col, "utf-8")
+    encoded_with_ns = F.concat(ns, encoded)
+    hashed = F.sha1(encoded_with_ns)
+    variant_part = F.substring(hashed, 17, 4)
+    variant_part = F.conv(variant_part, 16, 2)
+    variant_part = F.lpad(variant_part, 16, "0")
+    variant_part = F.concat(
+        F.lit("10"),
+        F.substring(variant_part, 3, 16),
+    )  # RFC 4122 variant.
+    variant_part = F.lower(F.conv(variant_part, 2, 16))
+    return F.concat_ws(
+        "-",
+        F.substring(hashed, 1, 8),
+        F.substring(hashed, 9, 4),
+        F.concat(F.lit("5"), F.substring(hashed, 14, 3)),  # Set version.
+        variant_part,
+        F.substring(hashed, 21, 12),
+    )
+
+
+ +
+ +
+ + + +

+validate_absence_of_columns(df, prohibited_col_names) + +

+ + +
+ +

Validate that none of the prohibited column names are present among specified DataFrame columns.

+ +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
df + DataFrame +

DataFrame containing columns to be checked.

+ required +
prohibited_col_names + list[str] +

List of prohibited column names.

+ required +
+ +

Raises:

+ + + + + + + + + + + + + +
TypeDescription
+ DataFrameProhibitedColumnError +

If the prohibited column names are present among the specified DataFrame columns.

+ +
+ Source code in quinn/dataframe_validator.py +
def validate_absence_of_columns(df: DataFrame, prohibited_col_names: list[str]) -> None:
+    """Validate that none of the prohibited column names are present among specified DataFrame columns.
+
+    :param df: DataFrame containing columns to be checked.
+    :param prohibited_col_names: List of prohibited column names.
+    :raises DataFrameProhibitedColumnError: If the prohibited column names are
+    present among the specified DataFrame columns.
+    """
+    all_col_names = df.columns
+    extra_col_names = [x for x in all_col_names if x in prohibited_col_names]
+    error_message = f"The {extra_col_names} columns are not allowed to be included in the DataFrame with the following columns {all_col_names}"
+    if extra_col_names:
+        raise DataFrameProhibitedColumnError(error_message)
+
+
+ +
+ +
+ + + +

+validate_presence_of_columns(df, required_col_names) + +

+ + +
+ +

Validate the presence of column names in a DataFrame.

+ +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
df + DataFrame +

A spark DataFrame.

+ required +
required_col_names + list[str] +

List of the required column names for the DataFrame.

+ required +
+ +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ None +

None.

+ +

Raises:

+ + + + + + + + + + + + + +
TypeDescription
+ DataFrameMissingColumnError +

if any of the requested column names are not present in the DataFrame.

+ +
+ Source code in quinn/dataframe_validator.py +
def validate_presence_of_columns(df: DataFrame, required_col_names: list[str]) -> None:
+    """Validate the presence of column names in a DataFrame.
+
+    :param df: A spark DataFrame.
+    :type df: DataFrame`
+    :param required_col_names: List of the required column names for the DataFrame.
+    :type required_col_names: :py:class:`list` of :py:class:`str`
+    :return: None.
+    :raises DataFrameMissingColumnError: if any of the requested column names are
+    not present in the DataFrame.
+    """
+    all_col_names = df.columns
+    missing_col_names = [x for x in required_col_names if x not in all_col_names]
+    error_message = f"The {missing_col_names} columns are not included in the DataFrame with the following columns {all_col_names}"
+    if missing_col_names:
+        raise DataFrameMissingColumnError(error_message)
+
+
+ +
+ +
+ + + +

+validate_schema(df, required_schema, ignore_nullable=False) + +

+ + +
+ +

Function that validate if a given DataFrame has a given StructType as its schema.

+ +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
df + DataFrame +

DataFrame to validate

+ required +
required_schema + StructType +

StructType required for the DataFrame

+ required +
ignore_nullable + bool +

(Optional) A flag for if nullable fields should be ignored during validation

+ False +
+ +

Raises:

+ + + + + + + + + + + + + +
TypeDescription
+ DataFrameMissingStructFieldError +

if any StructFields from the required schema are not included in the DataFrame schema

+ +
+ Source code in quinn/dataframe_validator.py +
def validate_schema(
+    df: DataFrame,
+    required_schema: StructType,
+    ignore_nullable: bool = False,
+) -> None:
+    """Function that validate if a given DataFrame has a given StructType as its schema.
+
+    :param df: DataFrame to validate
+    :type df: DataFrame
+    :param required_schema: StructType required for the DataFrame
+    :type required_schema: StructType
+    :param ignore_nullable: (Optional) A flag for if nullable fields should be
+    ignored during validation
+    :type ignore_nullable: bool, optional
+
+    :raises DataFrameMissingStructFieldError: if any StructFields from the required
+    schema are not included in the DataFrame schema
+    """
+    _all_struct_fields = copy.deepcopy(df.schema)
+    _required_schema = copy.deepcopy(required_schema)
+
+    if ignore_nullable:
+        for x in _all_struct_fields:
+            x.nullable = None
+
+        for x in _required_schema:
+            x.nullable = None
+
+    missing_struct_fields = [x for x in _required_schema if x not in _all_struct_fields]
+    error_message = f"The {missing_struct_fields} StructFields are not included in the DataFrame with the following StructFields {_all_struct_fields}"
+
+    if missing_struct_fields:
+        raise DataFrameMissingStructFieldError(error_message)
+
+
+ +
+ +
+ + + +

+week_end_date(col, week_end_day='Sat') + +

+ + +
+ +

Return a date column for the end of week for a given day.

+

The Spark function dayofweek considers Sunday as the first day of the week, and +uses the default value of 1 to indicate Sunday. Usage of the when and otherwise +functions allow a comparison between the end of week day indicated and the day +of week computed, and the return of the reference date if they match or the the +addition of one week to the reference date otherwise.

+ +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
col + Column +

The reference date column.

+ required +
week_end_day + str +

The week end day (default: 'Sat')

+ 'Sat' +
+ +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ Column +

A Column of end of the week dates.

+ +
+ Source code in quinn/functions.py +
def week_end_date(col: Column, week_end_day: str = "Sat") -> Column:
+    """Return a date column for the end of week for a given day.
+
+    The Spark function `dayofweek` considers Sunday as the first day of the week, and
+    uses the default value of 1 to indicate Sunday. Usage of the `when` and `otherwise`
+    functions allow a comparison between the end of week day indicated and the day
+    of week computed, and the return of the reference date if they match or the the
+    addition of one week to the reference date otherwise.
+
+    :param col: The reference date column.
+    :type col: Column
+    :param week_end_day: The week end day (default: 'Sat')
+    :type week_end_day: str
+    :return: A Column of end of the week dates.
+    :rtype: Column
+    """
+    _raise_if_invalid_day(week_end_day)
+    # these are the default Spark mappings.  Spark considers Sunday the first day of the week.
+    day_of_week_mapping = {
+        "Sun": 1,
+        "Mon": 2,
+        "Tue": 3,
+        "Wed": 4,
+        "Thu": 5,
+        "Fri": 6,
+        "Sat": 7,
+    }
+    return F.when(
+        F.dayofweek(col).eqNullSafe(F.lit(day_of_week_mapping[week_end_day])),
+        col,
+    ).otherwise(F.next_day(col, week_end_day))
+
+
+ +
+ +
+ + + +

+week_start_date(col, week_start_day='Sun') + +

+ + +
+ +

Function takes a Spark Column and an optional week_start_day argument and returns a Column with the corresponding start of week dates.

+

The "standard week" in Spark starts on Sunday, however an optional argument can be +used to start the week from a different day, e.g. Monday. The week_start_day +argument is a string corresponding to the day of the week to start the week +from, e.g. "Mon", "Tue", and must be in the set: {"Sun", "Mon", "Tue", "Wed", +"Thu", "Fri", "Sat"}. If the argument given is not a valid day then a ValueError +will be raised.

+ +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
col + Column +

The column to determine start of week dates on

+ required +
week_start_day + str +

The day to start the week on

+ 'Sun' +
+ +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ Column +

A Column with start of week dates

+ +
+ Source code in quinn/functions.py +
def week_start_date(col: Column, week_start_day: str = "Sun") -> Column:
+    """Function takes a Spark `Column` and an optional `week_start_day` argument and returns a `Column` with the corresponding start of week dates.
+
+    The "standard week" in Spark starts on Sunday, however an optional argument can be
+    used to start the week from a different day, e.g. Monday. The `week_start_day`
+    argument is a string corresponding to the day of the week to start the week
+    from, e.g. `"Mon"`, `"Tue"`, and must be in the set: `{"Sun", "Mon", "Tue", "Wed",
+    "Thu", "Fri", "Sat"}`. If the argument given is not a valid day then a `ValueError`
+    will be raised.
+
+    :param col: The column to determine start of week dates on
+    :type col: Column
+    :param week_start_day: The day to start the week on
+    :type week_start_day: str
+    :returns: A Column with start of week dates
+    :rtype: Column
+    """
+    _raise_if_invalid_day(week_start_day)
+    # the "standard week" in Spark is from Sunday to Saturday
+    mapping = {
+        "Sun": "Sat",
+        "Mon": "Sun",
+        "Tue": "Mon",
+        "Wed": "Tue",
+        "Thu": "Wed",
+        "Fri": "Thu",
+        "Sat": "Fri",
+    }
+    end = week_end_date(col, mapping[week_start_day])
+    return F.date_add(end, -6)
+
+
+ +
+ +
+ + + +

+with_columns_renamed(fun) + +

+ + +
+ +

Ffunction designed to rename the columns of a Spark DataFrame.

+

It takes a Callable[[str], str] object as an argument (fun) and returns a +Callable[[DataFrame], DataFrame] object.

+

When _() is called on a DataFrame, it creates a list of column names, +applying the argument fun() to each of them, and returning a new DataFrame +with the new column names.

+ +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
fun + Callable[[str], str] +

Renaming function

+ required +
+ +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ Callable[[DataFrame], DataFrame] +

Function which takes DataFrame as parameter.

+ +
+ Source code in quinn/transformations.py +
def with_columns_renamed(fun: Callable[[str], str]) -> Callable[[DataFrame], DataFrame]:
+    """Ffunction designed to rename the columns of a `Spark DataFrame`.
+
+    It takes a `Callable[[str], str]` object as an argument (``fun``) and returns a
+    `Callable[[DataFrame], DataFrame]` object.
+
+    When `_()` is called on a `DataFrame`, it creates a list of column names,
+    applying the argument `fun()` to each of them, and returning a new `DataFrame`
+    with the new column names.
+
+    :param fun: Renaming function
+    :returns: Function which takes DataFrame as parameter.
+    """
+
+    def _(df: DataFrame) -> DataFrame:
+        cols = [F.col(f"`{col_name}`").alias(fun(col_name)) for col_name in df.columns]
+        return df.select(*cols)
+
+    return _
+
+
+ +
+ +
+ + + +

+with_some_columns_renamed(fun, change_col_name) + +

+ + +
+ +

Function that takes a Callable[[str], str] and a Callable[[str], str] and returns a Callable[[DataFrame], DataFrame].

+

Which in turn takes a DataFrame and returns a DataFrame with some of its columns renamed.

+ +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
fun + Callable[[str], str] +

A function that takes a column name as a string and returns a new name as a string.

+ required +
change_col_name + Callable[[str], str] +

A function that takes a column name as a string and returns a boolean.

+ required +
+ +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ `Callable[[DataFrame], DataFrame]` +

A Callable[[DataFrame], DataFrame], which takes a DataFrame and returns a DataFrame with some of its columns renamed.

+ +
+ Source code in quinn/transformations.py +
def with_some_columns_renamed(
+    fun: Callable[[str], str],
+    change_col_name: Callable[[str], str],
+) -> Callable[[DataFrame], DataFrame]:
+    """Function that takes a `Callable[[str], str]` and a `Callable[[str], str]` and returns a `Callable[[DataFrame], DataFrame]`.
+
+    Which in turn takes a `DataFrame` and returns a `DataFrame` with some of its columns renamed.
+
+    :param fun: A function that takes a column name as a string and returns a
+    new name as a string.
+    :type fun: `Callable[[str], str]`
+    :param change_col_name: A function that takes a column name as a string and
+    returns a boolean.
+    :type change_col_name: `Callable[[str], str]`
+    :return: A `Callable[[DataFrame], DataFrame]`, which takes a
+    `DataFrame` and returns a `DataFrame` with some of its columns renamed.
+    :rtype: `Callable[[DataFrame], DataFrame]`
+    """
+
+    def _(df: DataFrame) -> DataFrame:
+        cols = [
+            F.col(f"`{col_name}`").alias(fun(col_name))
+            if change_col_name(col_name)
+            else F.col(f"`{col_name}`")
+            for col_name in df.columns
+        ]
+        return df.select(*cols)
+
+    return _
+
+
+ +
+ + + +
+ +
+ +
+ +
+
+ +
+
+ +
+ +
+ +
+ + + + + +
+ + + + + + + + + diff --git a/reference/quinn/schema_helpers/index.html b/reference/quinn/schema_helpers/index.html new file mode 100644 index 00000000..180b2696 --- /dev/null +++ b/reference/quinn/schema_helpers/index.html @@ -0,0 +1,491 @@ + + + + + + + + Schema helpers - Quinn + + + + + + + + + + + + + + +
+ + +
+ +
+
+
    +
  • + +
  • +
  • +
+
+
+
+
+ +
+ + + +
+ + + +
+ + + + + + + + + +
+ + + +

+complex_fields(schema) + +

+ + +
+ +

Returns a dictionary of complex field names and their data types from the input DataFrame's schema.

+ +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
df + DataFrame +

The input PySpark DataFrame.

+ required +
+ +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ Dict[str, object] +

A dictionary with complex field names as keys and their respective data types as values.

+ +
+ Source code in quinn/schema_helpers.py +
def complex_fields(schema: T.StructType) -> dict[str, object]:
+    """Returns a dictionary of complex field names and their data types from the input DataFrame's schema.
+
+    :param df: The input PySpark DataFrame.
+    :type df: DataFrame
+    :return: A dictionary with complex field names as keys and their respective data types as values.
+    :rtype: Dict[str, object]
+    """
+    return {
+        field.name: field.dataType
+        for field in schema.fields
+        if isinstance(field.dataType, (T.ArrayType, T.StructType, T.MapType))
+    }
+
+
+ +
+ +
+ + + +

+print_schema_as_code(dtype) + +

+ + +
+ +

Represent DataType (including StructType) as valid Python code.

+ +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
dtype + T.DataType +

The input DataType or Schema object

+ required +
+ +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ str +

A valid python code which generate the same schema.

+ +
+ Source code in quinn/schema_helpers.py +
def print_schema_as_code(dtype: T.DataType) -> str:
+    """Represent DataType (including StructType) as valid Python code.
+
+    :param dtype: The input DataType or Schema object
+    :type dtype: pyspark.sql.types.DataType
+    :return: A valid python code which generate the same schema.
+    :rtype: str
+    """
+    res = []
+    if isinstance(dtype, T.StructType):
+        res.append("StructType(\n\tfields=[")
+        for field in dtype.fields:
+            for line in _repr_column(field).split("\n"):
+                res.append("\n\t\t")
+                res.append(line)
+            res.append(",")
+        res.append("\n\t]\n)")
+
+    elif isinstance(dtype, T.ArrayType):
+        res.append("ArrayType(")
+        res.append(print_schema_as_code(dtype.elementType))
+        res.append(")")
+
+    elif isinstance(dtype, T.MapType):
+        res.append("MapType(")
+        res.append(f"\n\t{print_schema_as_code(dtype.keyType)},")
+        for line in print_schema_as_code(dtype.valueType).split("\n"):
+            res.append("\n\t")
+            res.append(line)
+        res.append(",")
+        res.append(f"\n\t{dtype.valueContainsNull},")
+        res.append("\n)")
+
+    elif isinstance(dtype, T.DecimalType):
+        res.append(f"DecimalType({dtype.precision}, {dtype.scale})")
+
+    elif str(dtype).endswith("()"):
+        # PySpark 3.3+
+        res.append(str(dtype))
+    else:
+        res.append(f"{dtype}()")
+
+    return "".join(res)
+
+
+ +
+ +
+ + + +

+schema_from_csv(spark, file_path) + +

+ + +
+ +

Return a StructType from a CSV file containing schema configuration.

+ +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
spark + SparkSession +

The SparkSession object

+ required +
file_path + str +

The path to the CSV file containing the schema configuration

+ required +
+ +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ pyspark.sql.types.StructType +

A StructType object representing the schema configuration

+ +

Raises:

+ + + + + + + + + + + + + +
TypeDescription
+ ValueError +

If the CSV file does not contain the expected columns: name, type, nullable, description

+ +
+ Source code in quinn/schema_helpers.py +
def schema_from_csv(spark: SparkSession, file_path: str) -> T.StructType:  # noqa: C901
+    """Return a StructType from a CSV file containing schema configuration.
+
+    :param spark: The SparkSession object
+    :type spark: pyspark.sql.session.SparkSession
+
+    :param file_path: The path to the CSV file containing the schema configuration
+    :type file_path: str
+
+    :raises ValueError: If the CSV file does not contain the expected columns: name, type, nullable, description
+
+    :return: A StructType object representing the schema configuration
+    :rtype: pyspark.sql.types.StructType
+    """
+
+    def _validate_json(metadata: str) -> dict:
+        if metadata is None:
+            return {}
+
+        try:
+            metadata_dict = json.loads(metadata)
+
+        except json.JSONDecodeError as exc:
+            msg = f"Invalid JSON: {metadata}"
+            raise ValueError(msg) from exc
+
+        return metadata_dict
+
+    def _lookup_type(type_str: str) -> T.DataType:
+        type_lookup = {
+            "string": T.StringType(),
+            "int": T.IntegerType(),
+            "float": T.FloatType(),
+            "double": T.DoubleType(),
+            "boolean": T.BooleanType(),
+            "bool": T.BooleanType(),
+            "timestamp": T.TimestampType(),
+            "date": T.DateType(),
+            "binary": T.BinaryType(),
+        }
+
+        if type_str not in type_lookup:
+            msg = f"Invalid type: {type_str}. Expecting one of: {type_lookup.keys()}"
+            raise ValueError(msg)
+
+        return type_lookup[type_str]
+
+    def _convert_nullable(null_str: str) -> bool:
+        if null_str is None:
+            return True
+
+        parsed_val = null_str.lower()
+        if parsed_val not in ["true", "false"]:
+            msg = f"Invalid nullable value: {null_str}. Expecting True or False."
+            raise ValueError(msg)
+
+        return parsed_val == "true"
+
+    schema_df = spark.read.csv(file_path, header=True)
+    possible_columns = ["name", "type", "nullable", "metadata"]
+    num_cols = len(schema_df.columns)
+    expected_columns = possible_columns[0:num_cols]
+
+    # ensure that csv contains the expected columns: name, type, nullable, description
+    if schema_df.columns != expected_columns:
+        msg = f"CSV must contain columns in this order: {expected_columns}"
+        raise ValueError(msg)
+
+    # create a StructType per field
+    fields = []
+    for row in schema_df.collect():
+        field = T.StructField(
+            name=row["name"],
+            dataType=_lookup_type(row["type"]),
+            nullable=_convert_nullable(row["nullable"]) if "nullable" in row else True,
+            metadata=_validate_json(row["metadata"] if "metadata" in row else None),
+        )
+        fields.append(field)
+
+    return T.StructType(fields=fields)
+
+
+ +
+ + + +
+ +
+ +
+ +
+
+ +
+
+ +
+ +
+ +
+ + + + + +
+ + + + + + + + + diff --git a/reference/quinn/spark/index.html b/reference/quinn/spark/index.html new file mode 100644 index 00000000..fc850402 --- /dev/null +++ b/reference/quinn/spark/index.html @@ -0,0 +1,297 @@ + + + + + + + + Spark - Quinn + + + + + + + + + + + + + + +
+ + +
+ +
+
+
    +
  • + +
  • +
  • +
+
+
+
+
+ +
+ + + +
+ + + +
+ + + + + + + + +
+ + + +

+ SparkProvider + + +

+ + +
+ + +

Class for creating and destroying SparkSession.

+ + +
+ Source code in quinn/spark.py +
class SparkProvider:
+    """Class for creating and destroying SparkSession."""
+
+    def __init__(
+        self: SparkProvider,
+        app_name: str,
+        conf: SparkConf | None = None,
+        extra_dependencies: list[str] | None = None,
+        extra_files: list[str] | None = None,
+    ) -> None:
+        """Initialize SparkSession."""
+        self.spark = self.set_up_spark(
+            app_name,
+            self.master,
+            conf,
+            extra_dependencies,
+            extra_files,
+        )
+
+    @property
+    def master(self: SparkProvider) -> str:  # noqa: D102
+        return os.getenv("SPARK_MASTER", STANDALONE)
+
+    @staticmethod
+    def set_up_spark(  # noqa: D102
+        app_name: str,
+        master: str = STANDALONE,
+        conf: SparkConf = None,
+        extra_dependencies: list[str] | None = None,
+        extra_files: list[str] | None = None,
+    ) -> SparkSession:
+        conf = conf if conf else SparkConf()
+
+        if extra_dependencies:
+            spark_dependencies = ",".join(extra_dependencies)
+            conf.set("spark.jars.packages", spark_dependencies)
+
+        spark = (
+            SparkSession.builder.appName(app_name)
+            .master(master)
+            .config(conf=conf)
+            .getOrCreate()
+        )
+
+        extra_files = extra_files if extra_files else []
+        for extra_file in extra_files:
+            spark.sparkContext.addPyFile(extra_file)
+
+        quiet_py4j()
+        return spark
+
+    @staticmethod
+    def tear_down_spark(spark: SparkSession) -> None:  # noqa: D102
+        spark.stop()
+        # To avoid Akka rebinding to the same port, since it doesn't unbind
+        # immediately on shutdown
+        spark._jvm.System.clearProperty("spark.driver.port")  # noqa: SLF001
+
+ + + +
+ + + + + + + + + +
+ + + +

+__init__(app_name, conf=None, extra_dependencies=None, extra_files=None) + +

+ + +
+ +

Initialize SparkSession.

+ +
+ Source code in quinn/spark.py +
def __init__(
+    self: SparkProvider,
+    app_name: str,
+    conf: SparkConf | None = None,
+    extra_dependencies: list[str] | None = None,
+    extra_files: list[str] | None = None,
+) -> None:
+    """Initialize SparkSession."""
+    self.spark = self.set_up_spark(
+        app_name,
+        self.master,
+        conf,
+        extra_dependencies,
+        extra_files,
+    )
+
+
+ +
+ + + +
+ +
+ +
+ + +
+ + + +

+quiet_py4j() + +

+ + +
+ +

Sets logging level of py4h.

+ +
+ Source code in quinn/spark.py +
def quiet_py4j() -> None:
+    """Sets logging level of py4h."""
+    logger = logging.getLogger("py4j")
+    logger.setLevel(logging.INFO)
+
+
+ +
+ + + +
+ +
+ +
+ +
+
+ +
+
+ +
+ +
+ +
+ + + + + +
+ + + + + + + + + diff --git a/reference/quinn/split_columns/index.html b/reference/quinn/split_columns/index.html new file mode 100644 index 00000000..1c640fb1 --- /dev/null +++ b/reference/quinn/split_columns/index.html @@ -0,0 +1,369 @@ + + + + + + + + Split columns - Quinn + + + + + + + + + + + + + + +
+ + +
+ +
+
+
    +
  • + +
  • +
  • +
+
+
+
+
+ +
+ + + +
+ + + +
+ + + + + + + + + +
+ + + +

+split_col(df, col_name, delimiter, new_col_names, mode='permissive', default=None) + +

+ + +
+ +

Splits the given column based on the delimiter and creates new columns with the split values.

+ +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
df + DataFrame +

The input DataFrame

+ required +
col_name + str +

The name of the column to split

+ required +
delimiter + str +

The delimiter to split the column on

+ required +
new_col_names + list[str] +

A list of two strings for the new column names

+ required +
mode + str +

The split mode. Can be "strict" or "permissive". Default is "permissive"

+ 'permissive' +
default + Optional[str] +

If the mode is "permissive" then default value will be assigned to column

+ None +
+ +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ pyspark.sql.DataFrame. +

dataframe: The resulting DataFrame with the split columns

+ +
+ Source code in quinn/split_columns.py +
def split_col(  # noqa: PLR0913
+    df: DataFrame,
+    col_name: str,
+    delimiter: str,
+    new_col_names: list[str],
+    mode: str = "permissive",
+    default: Optional[str] = None,
+) -> DataFrame:
+    """Splits the given column based on the delimiter and creates new columns with the split values.
+
+    :param df: The input DataFrame
+    :type df: pyspark.sql.DataFrame
+    :param col_name: The name of the column to split
+    :type col_name: str
+    :param delimiter: The delimiter to split the column on
+    :type delimiter: str
+    :param new_col_names: A list of two strings for the new column names
+    :type new_col_names: (List[str])
+    :param mode: The split mode. Can be "strict" or "permissive". Default is "permissive"
+    :type mode: str
+    :param default: If the mode is "permissive" then default value will be assigned to column
+    :type mode: str
+    :return: dataframe: The resulting DataFrame with the split columns
+    :rtype: pyspark.sql.DataFrame.
+    """
+    # Check if the column to be split exists in the DataFrame
+    if col_name not in df.columns:
+        msg = f"Column '{col_name}' not found in DataFrame."
+        raise ValueError(msg)
+
+    # Check if the delimiter is a string
+    if not isinstance(delimiter, str):
+        msg = "Delimiter must be a string."
+        raise TypeError(msg)
+
+    # Check if the new column names are a list of strings
+    if not isinstance(new_col_names, list):
+        msg = "New column names must be a list of strings."
+        raise TypeError(msg)
+
+    # Define a UDF to check the occurrence of delimitter
+    def _num_delimiter(col_value1: str) -> int:
+        # Get the count of delimiter and store the result in no_of_delimiter
+        no_of_delimiter = col_value1.count(delimiter)
+        # Split col_value based on delimiter and store the result in split_value
+        split_value = col_value1.split(delimiter)
+
+        # Check if col_value is not None
+        if col_value1 is not None:
+            # Check if the no of delimiters in split_value is not as expected
+            if no_of_delimiter != len(new_col_names) - 1:
+                # If the length is not same, raise an IndexError with the message mentioning the expected and found length
+                msg = f"Expected {len(new_col_names)} elements after splitting on delimiter, found {len(split_value)} elements"
+                raise IndexError(
+                    msg,
+                )
+
+            # If the length of split_value is same as new_col_names, check if any of the split values is None or empty string
+            elif any(  # noqa: RET506
+                x is None or x.strip() == "" for x in split_value[: len(new_col_names)]
+            ):
+                msg = "Null or empty values are not accepted for columns in strict mode"
+                raise ValueError(
+                    msg,
+                )
+
+            # If the above checks pass, return the count of delimiter
+            return int(no_of_delimiter)
+
+        # If col_value is None, return 0
+        return 0
+
+    num_udf = udf(lambda y: None if y is None else _num_delimiter(y), IntegerType())
+
+    # Get the column expression for the column to be split
+    col_expr = df[col_name]
+
+    # Split the column by the delimiter
+    split_col_expr = split(trim(col_expr), delimiter)
+
+    # Check the split mode
+    if mode == "strict":
+        # Create an array of select expressions to create new columns from the split values
+        select_exprs = [
+            when(split_col_expr.getItem(i) != "", split_col_expr.getItem(i)).alias(
+                new_col_names[i],
+            )
+            for i in range(len(new_col_names))
+        ]
+
+        # Select all the columns from the input DataFrame, along with the new split columns
+        df = df.select("*", *select_exprs)  # noqa: PD901
+        df = df.withColumn("del_length", num_udf(df[col_name]))  # noqa: PD901
+        df.cache()
+        # Drop the original column if the new columns were created successfully
+        df = df.select([c for c in df.columns if c not in {"del_length", col_name}])  # noqa: PD901
+
+    elif mode == "permissive":
+        # Create an array of select expressions to create new columns from the split values
+        # Use the default value if a split value is missing or empty
+        select_exprs = select_exprs = [
+            when(length(split_col_expr.getItem(i)) > 0, split_col_expr.getItem(i))
+            .otherwise(default)
+            .alias(new_col_names[i])
+            for i in range(len(new_col_names))
+        ]
+
+        # Select all the columns from the input DataFrame, along with the new split columns
+        # Drop the original column if the new columns were created successfully
+        df = df.select("*", *select_exprs).drop(col_name)  # noqa: PD901
+        df.cache()
+
+    else:
+        msg = f"Invalid mode: {mode}"
+        raise ValueError(msg)
+
+    # Return the DataFrame with the split columns
+    return df
+
+
+ +
+ + + +
+ +
+ +
+ +
+
+ +
+
+ +
+ +
+ +
+ + + + + +
+ + + + + + + + + diff --git a/reference/quinn/transformations/index.html b/reference/quinn/transformations/index.html new file mode 100644 index 00000000..47c39ced --- /dev/null +++ b/reference/quinn/transformations/index.html @@ -0,0 +1,1118 @@ + + + + + + + + Transformations - Quinn + + + + + + + + + + + + + + +
+ + +
+ +
+
+
    +
  • + +
  • +
  • +
+
+
+
+
+ +
+ + + +
+ + + +
+ + + + + + + + + +
+ + + +

+flatten_dataframe(df, separator=':', replace_char='_', sanitized_columns=False) + +

+ + +
+ +

Flattens the complex columns in the DataFrame.

+ +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
df + DataFrame +

The input PySpark DataFrame.

+ required +
separator + str +

The separator to use in the resulting flattened column names, defaults to ":".

+ ':' +
replace_char + str +

The character to replace special characters with in column names, defaults to "_".

+ '_' +
sanitized_columns + bool +

Whether to sanitize column names, defaults to False.

+ False +
+ +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ DataFrame .. note:: This function assumes the input DataFrame has a consistent schema across all rows. If you have files with different schemas, process each separately instead. .. example:: Example usage: >>> data = [ ( 1, ("Alice", 25), {"A": 100, "B": 200}, ["apple", "banana"], {"key": {"nested_key": 10}}, {"A#": 1000, "B@": 2000}, ), ( 2, ("Bob", 30), {"A": 150, "B": 250}, ["orange", "grape"], {"key": {"nested_key": 20}}, {"A#": 1500, "B@": 2500}, ), ] >>> df = spark.createDataFrame(data) >>> flattened_df = flatten_dataframe(df) >>> flattened_df.show() >>> flattened_df_with_hyphen = flatten_dataframe(df, replace_char="-") >>> flattened_df_with_hyphen.show() +

The DataFrame with all complex data types flattened.

+ +
+ Source code in quinn/transformations.py +
def flatten_dataframe(
+    df: DataFrame,
+    separator: str = ":",
+    replace_char: str = "_",
+    sanitized_columns: bool = False,
+) -> DataFrame:
+    """Flattens the complex columns in the DataFrame.
+
+    :param df: The input PySpark DataFrame.
+    :type df: DataFrame
+    :param separator: The separator to use in the resulting flattened column names, defaults to ":".
+    :type separator: str, optional
+    :param replace_char: The character to replace special characters with in column names, defaults to "_".
+    :type replace_char: str, optional
+    :param sanitized_columns: Whether to sanitize column names, defaults to False.
+    :type sanitized_columns: bool, optional
+    :return: The DataFrame with all complex data types flattened.
+    :rtype: DataFrame
+
+    .. note:: This function assumes the input DataFrame has a consistent schema across all rows. If you have files with
+        different schemas, process each separately instead.
+
+    .. example:: Example usage:
+
+        >>> data = [
+                (
+                    1,
+                    ("Alice", 25),
+                    {"A": 100, "B": 200},
+                    ["apple", "banana"],
+                    {"key": {"nested_key": 10}},
+                    {"A#": 1000, "B@": 2000},
+                ),
+                (
+                    2,
+                    ("Bob", 30),
+                    {"A": 150, "B": 250},
+                    ["orange", "grape"],
+                    {"key": {"nested_key": 20}},
+                    {"A#": 1500, "B@": 2500},
+                ),
+            ]
+
+        >>> df = spark.createDataFrame(data)
+        >>> flattened_df = flatten_dataframe(df)
+        >>> flattened_df.show()
+        >>> flattened_df_with_hyphen = flatten_dataframe(df, replace_char="-")
+        >>> flattened_df_with_hyphen.show()
+    """
+
+    def sanitize_column_name(name: str, rc: str = "_") -> str:
+        """Sanitizes column names by replacing special characters with the specified character.
+
+        :param name: The original column name.
+        :type name: str
+        :param rc: The character to replace special characters with, defaults to '_'.
+        :type rc: str, optional
+        :return: The sanitized column name.
+        :rtype: str
+        """
+        return re.sub(r"[^a-zA-Z0-9_]", rc, name)
+
+    def explode_array(df: DataFrame, col_name: str) -> DataFrame:
+        """Explodes the specified ArrayType column in the input DataFrame and returns a new DataFrame with the exploded column.
+
+        :param df: The input PySpark DataFrame.
+        :type df: DataFrame
+        :param col_name: The column name of the ArrayType to be exploded.
+        :type col_name: str
+        :return: The DataFrame with the exploded ArrayType column.
+        :rtype: DataFrame
+        """
+        return df.select(
+            "*", F.explode_outer(F.col(f"`{col_name}`")).alias(col_name),
+        ).drop(
+            col_name,
+        )
+
+    fields = complex_fields(df.schema)
+
+    while len(fields) != 0:
+        col_name = next(iter(fields.keys()))
+
+        if isinstance(fields[col_name], StructType):
+            df = flatten_struct(df, col_name, separator)  # noqa: PD901
+
+        elif isinstance(fields[col_name], ArrayType):
+            df = explode_array(df, col_name)  # noqa: PD901
+
+        elif isinstance(fields[col_name], MapType):
+            df = flatten_map(df, col_name, separator)  # noqa: PD901
+
+        fields = complex_fields(df.schema)
+
+    # Sanitize column names with the specified replace_char
+    if sanitized_columns:
+        sanitized_columns = [
+            sanitize_column_name(col_name, replace_char) for col_name in df.columns
+        ]
+        df = df.toDF(*sanitized_columns)  # noqa: PD901
+
+    return df
+
+
+ +
+ +
+ + + +

+flatten_map(df, col_name, separator=':') + +

+ + +
+ +

Flattens the specified MapType column in the input DataFrame and returns a new DataFrame with the flattened columns.

+ +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
df + DataFrame +

The input PySpark DataFrame.

+ required +
col_name + str +

The column name of the MapType to be flattened.

+ required +
separator + str +

The separator to use in the resulting flattened column names, defaults to ":".

+ ':' +
+ +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ DataFrame +

The DataFrame with the flattened MapType column.

+ +
+ Source code in quinn/transformations.py +
def flatten_map(df: DataFrame, col_name: str, separator: str = ":") -> DataFrame:
+    """Flattens the specified MapType column in the input DataFrame and returns a new DataFrame with the flattened columns.
+
+    :param df: The input PySpark DataFrame.
+    :type df: DataFrame
+    :param col_name: The column name of the MapType to be flattened.
+    :type col_name: str
+    :param separator: The separator to use in the resulting flattened column names, defaults to ":".
+    :type separator: str, optional
+    :return: The DataFrame with the flattened MapType column.
+    :rtype: DataFrame
+    """
+    keys_df = df.select(F.explode_outer(F.map_keys(F.col(f"`{col_name}`")))).distinct()
+    keys = [row[0] for row in keys_df.collect()]
+    key_cols = [
+        F.col(f"`{col_name}`").getItem(k).alias(col_name + separator + k) for k in keys
+    ]
+    return df.select(
+        [F.col(f"`{col}`") for col in df.columns if col != col_name] + key_cols,
+    )
+
+
+ +
+ +
+ + + +

+flatten_struct(df, col_name, separator=':') + +

+ + +
+ +

Flattens the specified StructType column in the input DataFrame and returns a new DataFrame with the flattened columns.

+ +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
df + DataFrame +

The input PySpark DataFrame.

+ required +
col_name + str +

The column name of the StructType to be flattened.

+ required +
separator + str +

The separator to use in the resulting flattened column names, defaults to ':'.

+ ':' +
+ +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ List[Column] +

The DataFrame with the flattened StructType column.

+ +
+ Source code in quinn/transformations.py +
def flatten_struct(df: DataFrame, col_name: str, separator: str = ":") -> DataFrame:
+    """Flattens the specified StructType column in the input DataFrame and returns a new DataFrame with the flattened columns.
+
+    :param df: The input PySpark DataFrame.
+    :type df: DataFrame
+    :param col_name: The column name of the StructType to be flattened.
+    :type col_name: str
+    :param separator: The separator to use in the resulting flattened column names, defaults to ':'.
+    :type separator: str, optional
+    :return: The DataFrame with the flattened StructType column.
+    :rtype: List[Column]
+    """
+    struct_type = complex_fields(df.schema)[col_name]
+    expanded = [
+        F.col(f"`{col_name}`.`{k}`").alias(col_name + separator + k)
+        for k in [n.name for n in struct_type.fields]
+    ]
+    return df.select("*", *expanded).drop(F.col(f"`{col_name}`"))
+
+
+ +
+ +
+ + + +

+snake_case_col_names(df) + +

+ + +
+ +

Function takes a DataFrame instance and returns the same DataFrame instance with all column names converted to snake case.

+

(e.g. col_name_1). It uses the to_snake_case function in conjunction with +the with_columns_renamed function to achieve this.

+ +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
df + DataFrame +

A DataFrame instance to process

+ required +
+ +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ ``DataFrame``. +

A DataFrame instance with column names converted to snake case

+ +
+ Source code in quinn/transformations.py +
def snake_case_col_names(df: DataFrame) -> DataFrame:
+    """Function takes a ``DataFrame`` instance and returns the same ``DataFrame`` instance with all column names converted to snake case.
+
+    (e.g. ``col_name_1``). It uses the ``to_snake_case`` function in conjunction with
+    the ``with_columns_renamed`` function to achieve this.
+    :param df: A ``DataFrame`` instance to process
+    :type df: ``DataFrame``
+    :return: A ``DataFrame`` instance with column names converted to snake case
+    :rtype: ``DataFrame``.
+    """
+    return with_columns_renamed(to_snake_case)(df)
+
+
+ +
+ +
+ + + +

+sort_columns(df, sort_order, sort_nested=False) + +

+ + +
+ +

This function sorts the columns of a given DataFrame based on a given sort +order. The sort_order parameter can either be asc or desc, which correspond to +ascending and descending order, respectively. If any other value is provided for +the sort_order parameter, a ValueError will be raised.

+ +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
df + DataFrame +

A DataFrame

+ required +
sort_order + str +

The order in which to sort the columns in the DataFrame

+ required +
sort_nested + bool +

Whether to sort nested structs or not. Defaults to false.

+ False +
+ +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ pyspark.sql.DataFrame +

A DataFrame with the columns sorted in the chosen order

+ +
+ Source code in quinn/transformations.py +
def sort_columns( # noqa: C901,PLR0915
+        df: DataFrame, sort_order: str, sort_nested: bool = False,
+) -> DataFrame:
+    """This function sorts the columns of a given DataFrame based on a given sort
+    order. The ``sort_order`` parameter can either be ``asc`` or ``desc``, which correspond to
+    ascending and descending order, respectively. If any other value is provided for
+    the ``sort_order`` parameter, a ``ValueError`` will be raised.
+
+    :param df: A DataFrame
+    :type df: pyspark.sql.DataFrame
+    :param sort_order: The order in which to sort the columns in the DataFrame
+    :type sort_order: str
+    :param sort_nested: Whether to sort nested structs or not. Defaults to false.
+    :type sort_nested: bool
+    :return: A DataFrame with the columns sorted in the chosen order
+    :rtype: pyspark.sql.DataFrame
+    """
+
+    def sort_nested_cols(schema, is_reversed, base_field="") -> list[str]: # noqa: ANN001
+        # recursively check nested fields and sort them
+        # https://stackoverflow.com/questions/57821538/how-to-sort-columns-of-nested-structs-alphabetically-in-pyspark
+        # Credits: @pault for logic
+
+        def parse_fields(
+            fields_to_sort: list, parent_struct, is_reversed: bool, # noqa: ANN001
+        ) -> list:
+            sorted_fields: list = sorted(
+                fields_to_sort,
+                key=lambda x: x["name"],
+                reverse=is_reversed,
+            )
+
+            results = []
+            for field in sorted_fields:
+                new_struct = StructType([StructField.fromJson(field)])
+                new_base_field = parent_struct.name
+                if base_field:
+                    new_base_field = base_field + "." + new_base_field
+
+                results.extend(
+                    sort_nested_cols(new_struct, is_reversed, base_field=new_base_field),
+                )
+            return results
+
+        select_cols = []
+        for parent_struct in sorted(schema, key=lambda x: x.name, reverse=is_reversed):
+            field_type = parent_struct.dataType
+            if isinstance(field_type, ArrayType):
+                array_parent = parent_struct.jsonValue()["type"]["elementType"]
+                base_str = f"transform({parent_struct.name}"
+                suffix_str = f") AS {parent_struct.name}"
+
+                # if struct in array, create mapping to struct
+                if array_parent["type"] == "struct":
+                    array_parent = array_parent["fields"]
+                    base_str = f"{base_str}, x -> struct("
+                    suffix_str = f"){suffix_str}"
+
+                array_elements = parse_fields(array_parent, parent_struct, is_reversed)
+                element_names = [i.split(".")[-1] for i in array_elements]
+                array_elements_formatted = [f"x.{i} as {i}" for i in element_names]
+
+                # create a string representation of the sorted array
+                # ex: transform(phone_numbers, x -> struct(x.number as number, x.type as type)) AS phone_numbers
+                result = f"{base_str}{', '.join(array_elements_formatted)}{suffix_str}"
+
+            elif isinstance(field_type, StructType):
+                field_list = parent_struct.jsonValue()["type"]["fields"]
+                sub_fields = parse_fields(field_list, parent_struct, is_reversed)
+
+                # create a string representation of the sorted struct
+                # ex: struct(address.zip.first5, address.zip.last4) AS zip
+                result = f"struct({', '.join(sub_fields)}) AS {parent_struct.name}"
+
+            elif base_field:
+                result = f"{base_field}.{parent_struct.name}"
+            else:
+                result = parent_struct.name
+            select_cols.append(result)
+
+        return select_cols
+
+    def get_original_nullability(field: StructField, result_dict: dict) -> None:
+        if hasattr(field, "nullable"):
+            result_dict[field.name] = field.nullable
+        else:
+            result_dict[field.name] = True
+
+        if not isinstance(field.dataType, StructType) and not isinstance(
+            field.dataType, ArrayType,
+        ):
+            return
+
+        if isinstance(field.dataType, ArrayType):
+            result_dict[f"{field.name}_element"] = field.dataType.containsNull
+            children = field.dataType.elementType.fields
+        else:
+            children = field.dataType.fields
+        for i in children:
+            get_original_nullability(i, result_dict)
+
+    def fix_nullability(field: StructField, result_dict: dict) -> None:
+        field.nullable = result_dict[field.name]
+        if not isinstance(field.dataType, StructType) and not isinstance(
+            field.dataType, ArrayType,
+        ):
+            return
+
+        if isinstance(field.dataType, ArrayType):
+            # save the containsNull property of the ArrayType
+            field.dataType.containsNull = result_dict[f"{field.name}_element"]
+            children = field.dataType.elementType.fields
+        else:
+            children = field.dataType.fields
+
+        for i in children:
+            fix_nullability(i, result_dict)
+
+    if sort_order not in ["asc", "desc"]:
+        msg = f"['asc', 'desc'] are the only valid sort orders and you entered a sort order of '{sort_order}'"
+        raise ValueError(
+            msg,
+        )
+    reverse_lookup = {
+        "asc": False,
+        "desc": True,
+    }
+
+    is_reversed: bool = reverse_lookup[sort_order]
+    top_level_sorted_df = df.select(*sorted(df.columns, reverse=is_reversed))
+    if not sort_nested:
+        return top_level_sorted_df
+
+    is_nested: bool = any(
+        isinstance(i.dataType, (StructType, ArrayType))
+            for i in top_level_sorted_df.schema
+    )
+
+    if not is_nested:
+        return top_level_sorted_df
+
+    fully_sorted_schema = sort_nested_cols(top_level_sorted_df.schema, is_reversed)
+    output = df.selectExpr(fully_sorted_schema)
+    result_dict = {}
+    for field in df.schema:
+        get_original_nullability(field, result_dict)
+
+    for field in output.schema:
+        fix_nullability(field, result_dict)
+
+    if not hasattr(SparkSession, "getActiveSession"): # spark 2.4
+        spark = SparkSession.builder.getOrCreate()
+    else:
+        spark = SparkSession.getActiveSession()
+        spark = spark if spark is not None else SparkSession.builder.getOrCreate()
+
+    return spark.createDataFrame(output.rdd, output.schema)
+
+
+ +
+ +
+ + + +

+to_snake_case(s) + +

+ + +
+ +

Takes a string and converts it to snake case format.

+ +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
s + str +

The string to be converted.

+ required +
+ +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ str +

The string in snake case format.

+ +
+ Source code in quinn/transformations.py +
def to_snake_case(s: str) -> str:
+    """Takes a string and converts it to snake case format.
+
+    :param s: The string to be converted.
+    :type s: str
+    :return: The string in snake case format.
+    :rtype: str
+    """
+    return s.lower().replace(" ", "_")
+
+
+ +
+ +
+ + + +

+with_columns_renamed(fun) + +

+ + +
+ +

Ffunction designed to rename the columns of a Spark DataFrame.

+

It takes a Callable[[str], str] object as an argument (fun) and returns a +Callable[[DataFrame], DataFrame] object.

+

When _() is called on a DataFrame, it creates a list of column names, +applying the argument fun() to each of them, and returning a new DataFrame +with the new column names.

+ +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
fun + Callable[[str], str] +

Renaming function

+ required +
+ +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ Callable[[DataFrame], DataFrame] +

Function which takes DataFrame as parameter.

+ +
+ Source code in quinn/transformations.py +
def with_columns_renamed(fun: Callable[[str], str]) -> Callable[[DataFrame], DataFrame]:
+    """Ffunction designed to rename the columns of a `Spark DataFrame`.
+
+    It takes a `Callable[[str], str]` object as an argument (``fun``) and returns a
+    `Callable[[DataFrame], DataFrame]` object.
+
+    When `_()` is called on a `DataFrame`, it creates a list of column names,
+    applying the argument `fun()` to each of them, and returning a new `DataFrame`
+    with the new column names.
+
+    :param fun: Renaming function
+    :returns: Function which takes DataFrame as parameter.
+    """
+
+    def _(df: DataFrame) -> DataFrame:
+        cols = [F.col(f"`{col_name}`").alias(fun(col_name)) for col_name in df.columns]
+        return df.select(*cols)
+
+    return _
+
+
+ +
+ +
+ + + +

+with_some_columns_renamed(fun, change_col_name) + +

+ + +
+ +

Function that takes a Callable[[str], str] and a Callable[[str], str] and returns a Callable[[DataFrame], DataFrame].

+

Which in turn takes a DataFrame and returns a DataFrame with some of its columns renamed.

+ +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
fun + Callable[[str], str] +

A function that takes a column name as a string and returns a new name as a string.

+ required +
change_col_name + Callable[[str], str] +

A function that takes a column name as a string and returns a boolean.

+ required +
+ +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ `Callable[[DataFrame], DataFrame]` +

A Callable[[DataFrame], DataFrame], which takes a DataFrame and returns a DataFrame with some of its columns renamed.

+ +
+ Source code in quinn/transformations.py +
def with_some_columns_renamed(
+    fun: Callable[[str], str],
+    change_col_name: Callable[[str], str],
+) -> Callable[[DataFrame], DataFrame]:
+    """Function that takes a `Callable[[str], str]` and a `Callable[[str], str]` and returns a `Callable[[DataFrame], DataFrame]`.
+
+    Which in turn takes a `DataFrame` and returns a `DataFrame` with some of its columns renamed.
+
+    :param fun: A function that takes a column name as a string and returns a
+    new name as a string.
+    :type fun: `Callable[[str], str]`
+    :param change_col_name: A function that takes a column name as a string and
+    returns a boolean.
+    :type change_col_name: `Callable[[str], str]`
+    :return: A `Callable[[DataFrame], DataFrame]`, which takes a
+    `DataFrame` and returns a `DataFrame` with some of its columns renamed.
+    :rtype: `Callable[[DataFrame], DataFrame]`
+    """
+
+    def _(df: DataFrame) -> DataFrame:
+        cols = [
+            F.col(f"`{col_name}`").alias(fun(col_name))
+            if change_col_name(col_name)
+            else F.col(f"`{col_name}`")
+            for col_name in df.columns
+        ]
+        return df.select(*cols)
+
+    return _
+
+
+ +
+ + + +
+ +
+ +
+ +
+
+ +
+
+ +
+ +
+ +
+ + + + + +
+ + + + + + + + + diff --git a/search.html b/search.html new file mode 100644 index 00000000..7f1615da --- /dev/null +++ b/search.html @@ -0,0 +1,117 @@ + + + + + + + + Quinn + + + + + + + + + + + + +
+ + +
+ +
+
+
    +
  • +
  • +
  • +
+
+
+
+
+ + +

Search Results

+ + + +
+ Searching... +
+ + +
+
+ +
+
+ +
+ +
+ +
+ + + + + +
+ + + + + + + + + diff --git a/search/lunr.js b/search/lunr.js new file mode 100644 index 00000000..aca0a167 --- /dev/null +++ b/search/lunr.js @@ -0,0 +1,3475 @@ +/** + * lunr - http://lunrjs.com - A bit like Solr, but much smaller and not as bright - 2.3.9 + * Copyright (C) 2020 Oliver Nightingale + * @license MIT + */ + +;(function(){ + +/** + * A convenience function for configuring and constructing + * a new lunr Index. + * + * A lunr.Builder instance is created and the pipeline setup + * with a trimmer, stop word filter and stemmer. + * + * This builder object is yielded to the configuration function + * that is passed as a parameter, allowing the list of fields + * and other builder parameters to be customised. + * + * All documents _must_ be added within the passed config function. + * + * @example + * var idx = lunr(function () { + * this.field('title') + * this.field('body') + * this.ref('id') + * + * documents.forEach(function (doc) { + * this.add(doc) + * }, this) + * }) + * + * @see {@link lunr.Builder} + * @see {@link lunr.Pipeline} + * @see {@link lunr.trimmer} + * @see {@link lunr.stopWordFilter} + * @see {@link lunr.stemmer} + * @namespace {function} lunr + */ +var lunr = function (config) { + var builder = new lunr.Builder + + builder.pipeline.add( + lunr.trimmer, + lunr.stopWordFilter, + lunr.stemmer + ) + + builder.searchPipeline.add( + lunr.stemmer + ) + + config.call(builder, builder) + return builder.build() +} + +lunr.version = "2.3.9" +/*! + * lunr.utils + * Copyright (C) 2020 Oliver Nightingale + */ + +/** + * A namespace containing utils for the rest of the lunr library + * @namespace lunr.utils + */ +lunr.utils = {} + +/** + * Print a warning message to the console. + * + * @param {String} message The message to be printed. + * @memberOf lunr.utils + * @function + */ +lunr.utils.warn = (function (global) { + /* eslint-disable no-console */ + return function (message) { + if (global.console && console.warn) { + console.warn(message) + } + } + /* eslint-enable no-console */ +})(this) + +/** + * Convert an object to a string. + * + * In the case of `null` and `undefined` the function returns + * the empty string, in all other cases the result of calling + * `toString` on the passed object is returned. + * + * @param {Any} obj The object to convert to a string. + * @return {String} string representation of the passed object. + * @memberOf lunr.utils + */ +lunr.utils.asString = function (obj) { + if (obj === void 0 || obj === null) { + return "" + } else { + return obj.toString() + } +} + +/** + * Clones an object. + * + * Will create a copy of an existing object such that any mutations + * on the copy cannot affect the original. + * + * Only shallow objects are supported, passing a nested object to this + * function will cause a TypeError. + * + * Objects with primitives, and arrays of primitives are supported. + * + * @param {Object} obj The object to clone. + * @return {Object} a clone of the passed object. + * @throws {TypeError} when a nested object is passed. + * @memberOf Utils + */ +lunr.utils.clone = function (obj) { + if (obj === null || obj === undefined) { + return obj + } + + var clone = Object.create(null), + keys = Object.keys(obj) + + for (var i = 0; i < keys.length; i++) { + var key = keys[i], + val = obj[key] + + if (Array.isArray(val)) { + clone[key] = val.slice() + continue + } + + if (typeof val === 'string' || + typeof val === 'number' || + typeof val === 'boolean') { + clone[key] = val + continue + } + + throw new TypeError("clone is not deep and does not support nested objects") + } + + return clone +} +lunr.FieldRef = function (docRef, fieldName, stringValue) { + this.docRef = docRef + this.fieldName = fieldName + this._stringValue = stringValue +} + +lunr.FieldRef.joiner = "/" + +lunr.FieldRef.fromString = function (s) { + var n = s.indexOf(lunr.FieldRef.joiner) + + if (n === -1) { + throw "malformed field ref string" + } + + var fieldRef = s.slice(0, n), + docRef = s.slice(n + 1) + + return new lunr.FieldRef (docRef, fieldRef, s) +} + +lunr.FieldRef.prototype.toString = function () { + if (this._stringValue == undefined) { + this._stringValue = this.fieldName + lunr.FieldRef.joiner + this.docRef + } + + return this._stringValue +} +/*! + * lunr.Set + * Copyright (C) 2020 Oliver Nightingale + */ + +/** + * A lunr set. + * + * @constructor + */ +lunr.Set = function (elements) { + this.elements = Object.create(null) + + if (elements) { + this.length = elements.length + + for (var i = 0; i < this.length; i++) { + this.elements[elements[i]] = true + } + } else { + this.length = 0 + } +} + +/** + * A complete set that contains all elements. + * + * @static + * @readonly + * @type {lunr.Set} + */ +lunr.Set.complete = { + intersect: function (other) { + return other + }, + + union: function () { + return this + }, + + contains: function () { + return true + } +} + +/** + * An empty set that contains no elements. + * + * @static + * @readonly + * @type {lunr.Set} + */ +lunr.Set.empty = { + intersect: function () { + return this + }, + + union: function (other) { + return other + }, + + contains: function () { + return false + } +} + +/** + * Returns true if this set contains the specified object. + * + * @param {object} object - Object whose presence in this set is to be tested. + * @returns {boolean} - True if this set contains the specified object. + */ +lunr.Set.prototype.contains = function (object) { + return !!this.elements[object] +} + +/** + * Returns a new set containing only the elements that are present in both + * this set and the specified set. + * + * @param {lunr.Set} other - set to intersect with this set. + * @returns {lunr.Set} a new set that is the intersection of this and the specified set. + */ + +lunr.Set.prototype.intersect = function (other) { + var a, b, elements, intersection = [] + + if (other === lunr.Set.complete) { + return this + } + + if (other === lunr.Set.empty) { + return other + } + + if (this.length < other.length) { + a = this + b = other + } else { + a = other + b = this + } + + elements = Object.keys(a.elements) + + for (var i = 0; i < elements.length; i++) { + var element = elements[i] + if (element in b.elements) { + intersection.push(element) + } + } + + return new lunr.Set (intersection) +} + +/** + * Returns a new set combining the elements of this and the specified set. + * + * @param {lunr.Set} other - set to union with this set. + * @return {lunr.Set} a new set that is the union of this and the specified set. + */ + +lunr.Set.prototype.union = function (other) { + if (other === lunr.Set.complete) { + return lunr.Set.complete + } + + if (other === lunr.Set.empty) { + return this + } + + return new lunr.Set(Object.keys(this.elements).concat(Object.keys(other.elements))) +} +/** + * A function to calculate the inverse document frequency for + * a posting. This is shared between the builder and the index + * + * @private + * @param {object} posting - The posting for a given term + * @param {number} documentCount - The total number of documents. + */ +lunr.idf = function (posting, documentCount) { + var documentsWithTerm = 0 + + for (var fieldName in posting) { + if (fieldName == '_index') continue // Ignore the term index, its not a field + documentsWithTerm += Object.keys(posting[fieldName]).length + } + + var x = (documentCount - documentsWithTerm + 0.5) / (documentsWithTerm + 0.5) + + return Math.log(1 + Math.abs(x)) +} + +/** + * A token wraps a string representation of a token + * as it is passed through the text processing pipeline. + * + * @constructor + * @param {string} [str=''] - The string token being wrapped. + * @param {object} [metadata={}] - Metadata associated with this token. + */ +lunr.Token = function (str, metadata) { + this.str = str || "" + this.metadata = metadata || {} +} + +/** + * Returns the token string that is being wrapped by this object. + * + * @returns {string} + */ +lunr.Token.prototype.toString = function () { + return this.str +} + +/** + * A token update function is used when updating or optionally + * when cloning a token. + * + * @callback lunr.Token~updateFunction + * @param {string} str - The string representation of the token. + * @param {Object} metadata - All metadata associated with this token. + */ + +/** + * Applies the given function to the wrapped string token. + * + * @example + * token.update(function (str, metadata) { + * return str.toUpperCase() + * }) + * + * @param {lunr.Token~updateFunction} fn - A function to apply to the token string. + * @returns {lunr.Token} + */ +lunr.Token.prototype.update = function (fn) { + this.str = fn(this.str, this.metadata) + return this +} + +/** + * Creates a clone of this token. Optionally a function can be + * applied to the cloned token. + * + * @param {lunr.Token~updateFunction} [fn] - An optional function to apply to the cloned token. + * @returns {lunr.Token} + */ +lunr.Token.prototype.clone = function (fn) { + fn = fn || function (s) { return s } + return new lunr.Token (fn(this.str, this.metadata), this.metadata) +} +/*! + * lunr.tokenizer + * Copyright (C) 2020 Oliver Nightingale + */ + +/** + * A function for splitting a string into tokens ready to be inserted into + * the search index. Uses `lunr.tokenizer.separator` to split strings, change + * the value of this property to change how strings are split into tokens. + * + * This tokenizer will convert its parameter to a string by calling `toString` and + * then will split this string on the character in `lunr.tokenizer.separator`. + * Arrays will have their elements converted to strings and wrapped in a lunr.Token. + * + * Optional metadata can be passed to the tokenizer, this metadata will be cloned and + * added as metadata to every token that is created from the object to be tokenized. + * + * @static + * @param {?(string|object|object[])} obj - The object to convert into tokens + * @param {?object} metadata - Optional metadata to associate with every token + * @returns {lunr.Token[]} + * @see {@link lunr.Pipeline} + */ +lunr.tokenizer = function (obj, metadata) { + if (obj == null || obj == undefined) { + return [] + } + + if (Array.isArray(obj)) { + return obj.map(function (t) { + return new lunr.Token( + lunr.utils.asString(t).toLowerCase(), + lunr.utils.clone(metadata) + ) + }) + } + + var str = obj.toString().toLowerCase(), + len = str.length, + tokens = [] + + for (var sliceEnd = 0, sliceStart = 0; sliceEnd <= len; sliceEnd++) { + var char = str.charAt(sliceEnd), + sliceLength = sliceEnd - sliceStart + + if ((char.match(lunr.tokenizer.separator) || sliceEnd == len)) { + + if (sliceLength > 0) { + var tokenMetadata = lunr.utils.clone(metadata) || {} + tokenMetadata["position"] = [sliceStart, sliceLength] + tokenMetadata["index"] = tokens.length + + tokens.push( + new lunr.Token ( + str.slice(sliceStart, sliceEnd), + tokenMetadata + ) + ) + } + + sliceStart = sliceEnd + 1 + } + + } + + return tokens +} + +/** + * The separator used to split a string into tokens. Override this property to change the behaviour of + * `lunr.tokenizer` behaviour when tokenizing strings. By default this splits on whitespace and hyphens. + * + * @static + * @see lunr.tokenizer + */ +lunr.tokenizer.separator = /[\s\-]+/ +/*! + * lunr.Pipeline + * Copyright (C) 2020 Oliver Nightingale + */ + +/** + * lunr.Pipelines maintain an ordered list of functions to be applied to all + * tokens in documents entering the search index and queries being ran against + * the index. + * + * An instance of lunr.Index created with the lunr shortcut will contain a + * pipeline with a stop word filter and an English language stemmer. Extra + * functions can be added before or after either of these functions or these + * default functions can be removed. + * + * When run the pipeline will call each function in turn, passing a token, the + * index of that token in the original list of all tokens and finally a list of + * all the original tokens. + * + * The output of functions in the pipeline will be passed to the next function + * in the pipeline. To exclude a token from entering the index the function + * should return undefined, the rest of the pipeline will not be called with + * this token. + * + * For serialisation of pipelines to work, all functions used in an instance of + * a pipeline should be registered with lunr.Pipeline. Registered functions can + * then be loaded. If trying to load a serialised pipeline that uses functions + * that are not registered an error will be thrown. + * + * If not planning on serialising the pipeline then registering pipeline functions + * is not necessary. + * + * @constructor + */ +lunr.Pipeline = function () { + this._stack = [] +} + +lunr.Pipeline.registeredFunctions = Object.create(null) + +/** + * A pipeline function maps lunr.Token to lunr.Token. A lunr.Token contains the token + * string as well as all known metadata. A pipeline function can mutate the token string + * or mutate (or add) metadata for a given token. + * + * A pipeline function can indicate that the passed token should be discarded by returning + * null, undefined or an empty string. This token will not be passed to any downstream pipeline + * functions and will not be added to the index. + * + * Multiple tokens can be returned by returning an array of tokens. Each token will be passed + * to any downstream pipeline functions and all will returned tokens will be added to the index. + * + * Any number of pipeline functions may be chained together using a lunr.Pipeline. + * + * @interface lunr.PipelineFunction + * @param {lunr.Token} token - A token from the document being processed. + * @param {number} i - The index of this token in the complete list of tokens for this document/field. + * @param {lunr.Token[]} tokens - All tokens for this document/field. + * @returns {(?lunr.Token|lunr.Token[])} + */ + +/** + * Register a function with the pipeline. + * + * Functions that are used in the pipeline should be registered if the pipeline + * needs to be serialised, or a serialised pipeline needs to be loaded. + * + * Registering a function does not add it to a pipeline, functions must still be + * added to instances of the pipeline for them to be used when running a pipeline. + * + * @param {lunr.PipelineFunction} fn - The function to check for. + * @param {String} label - The label to register this function with + */ +lunr.Pipeline.registerFunction = function (fn, label) { + if (label in this.registeredFunctions) { + lunr.utils.warn('Overwriting existing registered function: ' + label) + } + + fn.label = label + lunr.Pipeline.registeredFunctions[fn.label] = fn +} + +/** + * Warns if the function is not registered as a Pipeline function. + * + * @param {lunr.PipelineFunction} fn - The function to check for. + * @private + */ +lunr.Pipeline.warnIfFunctionNotRegistered = function (fn) { + var isRegistered = fn.label && (fn.label in this.registeredFunctions) + + if (!isRegistered) { + lunr.utils.warn('Function is not registered with pipeline. This may cause problems when serialising the index.\n', fn) + } +} + +/** + * Loads a previously serialised pipeline. + * + * All functions to be loaded must already be registered with lunr.Pipeline. + * If any function from the serialised data has not been registered then an + * error will be thrown. + * + * @param {Object} serialised - The serialised pipeline to load. + * @returns {lunr.Pipeline} + */ +lunr.Pipeline.load = function (serialised) { + var pipeline = new lunr.Pipeline + + serialised.forEach(function (fnName) { + var fn = lunr.Pipeline.registeredFunctions[fnName] + + if (fn) { + pipeline.add(fn) + } else { + throw new Error('Cannot load unregistered function: ' + fnName) + } + }) + + return pipeline +} + +/** + * Adds new functions to the end of the pipeline. + * + * Logs a warning if the function has not been registered. + * + * @param {lunr.PipelineFunction[]} functions - Any number of functions to add to the pipeline. + */ +lunr.Pipeline.prototype.add = function () { + var fns = Array.prototype.slice.call(arguments) + + fns.forEach(function (fn) { + lunr.Pipeline.warnIfFunctionNotRegistered(fn) + this._stack.push(fn) + }, this) +} + +/** + * Adds a single function after a function that already exists in the + * pipeline. + * + * Logs a warning if the function has not been registered. + * + * @param {lunr.PipelineFunction} existingFn - A function that already exists in the pipeline. + * @param {lunr.PipelineFunction} newFn - The new function to add to the pipeline. + */ +lunr.Pipeline.prototype.after = function (existingFn, newFn) { + lunr.Pipeline.warnIfFunctionNotRegistered(newFn) + + var pos = this._stack.indexOf(existingFn) + if (pos == -1) { + throw new Error('Cannot find existingFn') + } + + pos = pos + 1 + this._stack.splice(pos, 0, newFn) +} + +/** + * Adds a single function before a function that already exists in the + * pipeline. + * + * Logs a warning if the function has not been registered. + * + * @param {lunr.PipelineFunction} existingFn - A function that already exists in the pipeline. + * @param {lunr.PipelineFunction} newFn - The new function to add to the pipeline. + */ +lunr.Pipeline.prototype.before = function (existingFn, newFn) { + lunr.Pipeline.warnIfFunctionNotRegistered(newFn) + + var pos = this._stack.indexOf(existingFn) + if (pos == -1) { + throw new Error('Cannot find existingFn') + } + + this._stack.splice(pos, 0, newFn) +} + +/** + * Removes a function from the pipeline. + * + * @param {lunr.PipelineFunction} fn The function to remove from the pipeline. + */ +lunr.Pipeline.prototype.remove = function (fn) { + var pos = this._stack.indexOf(fn) + if (pos == -1) { + return + } + + this._stack.splice(pos, 1) +} + +/** + * Runs the current list of functions that make up the pipeline against the + * passed tokens. + * + * @param {Array} tokens The tokens to run through the pipeline. + * @returns {Array} + */ +lunr.Pipeline.prototype.run = function (tokens) { + var stackLength = this._stack.length + + for (var i = 0; i < stackLength; i++) { + var fn = this._stack[i] + var memo = [] + + for (var j = 0; j < tokens.length; j++) { + var result = fn(tokens[j], j, tokens) + + if (result === null || result === void 0 || result === '') continue + + if (Array.isArray(result)) { + for (var k = 0; k < result.length; k++) { + memo.push(result[k]) + } + } else { + memo.push(result) + } + } + + tokens = memo + } + + return tokens +} + +/** + * Convenience method for passing a string through a pipeline and getting + * strings out. This method takes care of wrapping the passed string in a + * token and mapping the resulting tokens back to strings. + * + * @param {string} str - The string to pass through the pipeline. + * @param {?object} metadata - Optional metadata to associate with the token + * passed to the pipeline. + * @returns {string[]} + */ +lunr.Pipeline.prototype.runString = function (str, metadata) { + var token = new lunr.Token (str, metadata) + + return this.run([token]).map(function (t) { + return t.toString() + }) +} + +/** + * Resets the pipeline by removing any existing processors. + * + */ +lunr.Pipeline.prototype.reset = function () { + this._stack = [] +} + +/** + * Returns a representation of the pipeline ready for serialisation. + * + * Logs a warning if the function has not been registered. + * + * @returns {Array} + */ +lunr.Pipeline.prototype.toJSON = function () { + return this._stack.map(function (fn) { + lunr.Pipeline.warnIfFunctionNotRegistered(fn) + + return fn.label + }) +} +/*! + * lunr.Vector + * Copyright (C) 2020 Oliver Nightingale + */ + +/** + * A vector is used to construct the vector space of documents and queries. These + * vectors support operations to determine the similarity between two documents or + * a document and a query. + * + * Normally no parameters are required for initializing a vector, but in the case of + * loading a previously dumped vector the raw elements can be provided to the constructor. + * + * For performance reasons vectors are implemented with a flat array, where an elements + * index is immediately followed by its value. E.g. [index, value, index, value]. This + * allows the underlying array to be as sparse as possible and still offer decent + * performance when being used for vector calculations. + * + * @constructor + * @param {Number[]} [elements] - The flat list of element index and element value pairs. + */ +lunr.Vector = function (elements) { + this._magnitude = 0 + this.elements = elements || [] +} + + +/** + * Calculates the position within the vector to insert a given index. + * + * This is used internally by insert and upsert. If there are duplicate indexes then + * the position is returned as if the value for that index were to be updated, but it + * is the callers responsibility to check whether there is a duplicate at that index + * + * @param {Number} insertIdx - The index at which the element should be inserted. + * @returns {Number} + */ +lunr.Vector.prototype.positionForIndex = function (index) { + // For an empty vector the tuple can be inserted at the beginning + if (this.elements.length == 0) { + return 0 + } + + var start = 0, + end = this.elements.length / 2, + sliceLength = end - start, + pivotPoint = Math.floor(sliceLength / 2), + pivotIndex = this.elements[pivotPoint * 2] + + while (sliceLength > 1) { + if (pivotIndex < index) { + start = pivotPoint + } + + if (pivotIndex > index) { + end = pivotPoint + } + + if (pivotIndex == index) { + break + } + + sliceLength = end - start + pivotPoint = start + Math.floor(sliceLength / 2) + pivotIndex = this.elements[pivotPoint * 2] + } + + if (pivotIndex == index) { + return pivotPoint * 2 + } + + if (pivotIndex > index) { + return pivotPoint * 2 + } + + if (pivotIndex < index) { + return (pivotPoint + 1) * 2 + } +} + +/** + * Inserts an element at an index within the vector. + * + * Does not allow duplicates, will throw an error if there is already an entry + * for this index. + * + * @param {Number} insertIdx - The index at which the element should be inserted. + * @param {Number} val - The value to be inserted into the vector. + */ +lunr.Vector.prototype.insert = function (insertIdx, val) { + this.upsert(insertIdx, val, function () { + throw "duplicate index" + }) +} + +/** + * Inserts or updates an existing index within the vector. + * + * @param {Number} insertIdx - The index at which the element should be inserted. + * @param {Number} val - The value to be inserted into the vector. + * @param {function} fn - A function that is called for updates, the existing value and the + * requested value are passed as arguments + */ +lunr.Vector.prototype.upsert = function (insertIdx, val, fn) { + this._magnitude = 0 + var position = this.positionForIndex(insertIdx) + + if (this.elements[position] == insertIdx) { + this.elements[position + 1] = fn(this.elements[position + 1], val) + } else { + this.elements.splice(position, 0, insertIdx, val) + } +} + +/** + * Calculates the magnitude of this vector. + * + * @returns {Number} + */ +lunr.Vector.prototype.magnitude = function () { + if (this._magnitude) return this._magnitude + + var sumOfSquares = 0, + elementsLength = this.elements.length + + for (var i = 1; i < elementsLength; i += 2) { + var val = this.elements[i] + sumOfSquares += val * val + } + + return this._magnitude = Math.sqrt(sumOfSquares) +} + +/** + * Calculates the dot product of this vector and another vector. + * + * @param {lunr.Vector} otherVector - The vector to compute the dot product with. + * @returns {Number} + */ +lunr.Vector.prototype.dot = function (otherVector) { + var dotProduct = 0, + a = this.elements, b = otherVector.elements, + aLen = a.length, bLen = b.length, + aVal = 0, bVal = 0, + i = 0, j = 0 + + while (i < aLen && j < bLen) { + aVal = a[i], bVal = b[j] + if (aVal < bVal) { + i += 2 + } else if (aVal > bVal) { + j += 2 + } else if (aVal == bVal) { + dotProduct += a[i + 1] * b[j + 1] + i += 2 + j += 2 + } + } + + return dotProduct +} + +/** + * Calculates the similarity between this vector and another vector. + * + * @param {lunr.Vector} otherVector - The other vector to calculate the + * similarity with. + * @returns {Number} + */ +lunr.Vector.prototype.similarity = function (otherVector) { + return this.dot(otherVector) / this.magnitude() || 0 +} + +/** + * Converts the vector to an array of the elements within the vector. + * + * @returns {Number[]} + */ +lunr.Vector.prototype.toArray = function () { + var output = new Array (this.elements.length / 2) + + for (var i = 1, j = 0; i < this.elements.length; i += 2, j++) { + output[j] = this.elements[i] + } + + return output +} + +/** + * A JSON serializable representation of the vector. + * + * @returns {Number[]} + */ +lunr.Vector.prototype.toJSON = function () { + return this.elements +} +/* eslint-disable */ +/*! + * lunr.stemmer + * Copyright (C) 2020 Oliver Nightingale + * Includes code from - http://tartarus.org/~martin/PorterStemmer/js.txt + */ + +/** + * lunr.stemmer is an english language stemmer, this is a JavaScript + * implementation of the PorterStemmer taken from http://tartarus.org/~martin + * + * @static + * @implements {lunr.PipelineFunction} + * @param {lunr.Token} token - The string to stem + * @returns {lunr.Token} + * @see {@link lunr.Pipeline} + * @function + */ +lunr.stemmer = (function(){ + var step2list = { + "ational" : "ate", + "tional" : "tion", + "enci" : "ence", + "anci" : "ance", + "izer" : "ize", + "bli" : "ble", + "alli" : "al", + "entli" : "ent", + "eli" : "e", + "ousli" : "ous", + "ization" : "ize", + "ation" : "ate", + "ator" : "ate", + "alism" : "al", + "iveness" : "ive", + "fulness" : "ful", + "ousness" : "ous", + "aliti" : "al", + "iviti" : "ive", + "biliti" : "ble", + "logi" : "log" + }, + + step3list = { + "icate" : "ic", + "ative" : "", + "alize" : "al", + "iciti" : "ic", + "ical" : "ic", + "ful" : "", + "ness" : "" + }, + + c = "[^aeiou]", // consonant + v = "[aeiouy]", // vowel + C = c + "[^aeiouy]*", // consonant sequence + V = v + "[aeiou]*", // vowel sequence + + mgr0 = "^(" + C + ")?" + V + C, // [C]VC... is m>0 + meq1 = "^(" + C + ")?" + V + C + "(" + V + ")?$", // [C]VC[V] is m=1 + mgr1 = "^(" + C + ")?" + V + C + V + C, // [C]VCVC... is m>1 + s_v = "^(" + C + ")?" + v; // vowel in stem + + var re_mgr0 = new RegExp(mgr0); + var re_mgr1 = new RegExp(mgr1); + var re_meq1 = new RegExp(meq1); + var re_s_v = new RegExp(s_v); + + var re_1a = /^(.+?)(ss|i)es$/; + var re2_1a = /^(.+?)([^s])s$/; + var re_1b = /^(.+?)eed$/; + var re2_1b = /^(.+?)(ed|ing)$/; + var re_1b_2 = /.$/; + var re2_1b_2 = /(at|bl|iz)$/; + var re3_1b_2 = new RegExp("([^aeiouylsz])\\1$"); + var re4_1b_2 = new RegExp("^" + C + v + "[^aeiouwxy]$"); + + var re_1c = /^(.+?[^aeiou])y$/; + var re_2 = /^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$/; + + var re_3 = /^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$/; + + var re_4 = /^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$/; + var re2_4 = /^(.+?)(s|t)(ion)$/; + + var re_5 = /^(.+?)e$/; + var re_5_1 = /ll$/; + var re3_5 = new RegExp("^" + C + v + "[^aeiouwxy]$"); + + var porterStemmer = function porterStemmer(w) { + var stem, + suffix, + firstch, + re, + re2, + re3, + re4; + + if (w.length < 3) { return w; } + + firstch = w.substr(0,1); + if (firstch == "y") { + w = firstch.toUpperCase() + w.substr(1); + } + + // Step 1a + re = re_1a + re2 = re2_1a; + + if (re.test(w)) { w = w.replace(re,"$1$2"); } + else if (re2.test(w)) { w = w.replace(re2,"$1$2"); } + + // Step 1b + re = re_1b; + re2 = re2_1b; + if (re.test(w)) { + var fp = re.exec(w); + re = re_mgr0; + if (re.test(fp[1])) { + re = re_1b_2; + w = w.replace(re,""); + } + } else if (re2.test(w)) { + var fp = re2.exec(w); + stem = fp[1]; + re2 = re_s_v; + if (re2.test(stem)) { + w = stem; + re2 = re2_1b_2; + re3 = re3_1b_2; + re4 = re4_1b_2; + if (re2.test(w)) { w = w + "e"; } + else if (re3.test(w)) { re = re_1b_2; w = w.replace(re,""); } + else if (re4.test(w)) { w = w + "e"; } + } + } + + // Step 1c - replace suffix y or Y by i if preceded by a non-vowel which is not the first letter of the word (so cry -> cri, by -> by, say -> say) + re = re_1c; + if (re.test(w)) { + var fp = re.exec(w); + stem = fp[1]; + w = stem + "i"; + } + + // Step 2 + re = re_2; + if (re.test(w)) { + var fp = re.exec(w); + stem = fp[1]; + suffix = fp[2]; + re = re_mgr0; + if (re.test(stem)) { + w = stem + step2list[suffix]; + } + } + + // Step 3 + re = re_3; + if (re.test(w)) { + var fp = re.exec(w); + stem = fp[1]; + suffix = fp[2]; + re = re_mgr0; + if (re.test(stem)) { + w = stem + step3list[suffix]; + } + } + + // Step 4 + re = re_4; + re2 = re2_4; + if (re.test(w)) { + var fp = re.exec(w); + stem = fp[1]; + re = re_mgr1; + if (re.test(stem)) { + w = stem; + } + } else if (re2.test(w)) { + var fp = re2.exec(w); + stem = fp[1] + fp[2]; + re2 = re_mgr1; + if (re2.test(stem)) { + w = stem; + } + } + + // Step 5 + re = re_5; + if (re.test(w)) { + var fp = re.exec(w); + stem = fp[1]; + re = re_mgr1; + re2 = re_meq1; + re3 = re3_5; + if (re.test(stem) || (re2.test(stem) && !(re3.test(stem)))) { + w = stem; + } + } + + re = re_5_1; + re2 = re_mgr1; + if (re.test(w) && re2.test(w)) { + re = re_1b_2; + w = w.replace(re,""); + } + + // and turn initial Y back to y + + if (firstch == "y") { + w = firstch.toLowerCase() + w.substr(1); + } + + return w; + }; + + return function (token) { + return token.update(porterStemmer); + } +})(); + +lunr.Pipeline.registerFunction(lunr.stemmer, 'stemmer') +/*! + * lunr.stopWordFilter + * Copyright (C) 2020 Oliver Nightingale + */ + +/** + * lunr.generateStopWordFilter builds a stopWordFilter function from the provided + * list of stop words. + * + * The built in lunr.stopWordFilter is built using this generator and can be used + * to generate custom stopWordFilters for applications or non English languages. + * + * @function + * @param {Array} token The token to pass through the filter + * @returns {lunr.PipelineFunction} + * @see lunr.Pipeline + * @see lunr.stopWordFilter + */ +lunr.generateStopWordFilter = function (stopWords) { + var words = stopWords.reduce(function (memo, stopWord) { + memo[stopWord] = stopWord + return memo + }, {}) + + return function (token) { + if (token && words[token.toString()] !== token.toString()) return token + } +} + +/** + * lunr.stopWordFilter is an English language stop word list filter, any words + * contained in the list will not be passed through the filter. + * + * This is intended to be used in the Pipeline. If the token does not pass the + * filter then undefined will be returned. + * + * @function + * @implements {lunr.PipelineFunction} + * @params {lunr.Token} token - A token to check for being a stop word. + * @returns {lunr.Token} + * @see {@link lunr.Pipeline} + */ +lunr.stopWordFilter = lunr.generateStopWordFilter([ + 'a', + 'able', + 'about', + 'across', + 'after', + 'all', + 'almost', + 'also', + 'am', + 'among', + 'an', + 'and', + 'any', + 'are', + 'as', + 'at', + 'be', + 'because', + 'been', + 'but', + 'by', + 'can', + 'cannot', + 'could', + 'dear', + 'did', + 'do', + 'does', + 'either', + 'else', + 'ever', + 'every', + 'for', + 'from', + 'get', + 'got', + 'had', + 'has', + 'have', + 'he', + 'her', + 'hers', + 'him', + 'his', + 'how', + 'however', + 'i', + 'if', + 'in', + 'into', + 'is', + 'it', + 'its', + 'just', + 'least', + 'let', + 'like', + 'likely', + 'may', + 'me', + 'might', + 'most', + 'must', + 'my', + 'neither', + 'no', + 'nor', + 'not', + 'of', + 'off', + 'often', + 'on', + 'only', + 'or', + 'other', + 'our', + 'own', + 'rather', + 'said', + 'say', + 'says', + 'she', + 'should', + 'since', + 'so', + 'some', + 'than', + 'that', + 'the', + 'their', + 'them', + 'then', + 'there', + 'these', + 'they', + 'this', + 'tis', + 'to', + 'too', + 'twas', + 'us', + 'wants', + 'was', + 'we', + 'were', + 'what', + 'when', + 'where', + 'which', + 'while', + 'who', + 'whom', + 'why', + 'will', + 'with', + 'would', + 'yet', + 'you', + 'your' +]) + +lunr.Pipeline.registerFunction(lunr.stopWordFilter, 'stopWordFilter') +/*! + * lunr.trimmer + * Copyright (C) 2020 Oliver Nightingale + */ + +/** + * lunr.trimmer is a pipeline function for trimming non word + * characters from the beginning and end of tokens before they + * enter the index. + * + * This implementation may not work correctly for non latin + * characters and should either be removed or adapted for use + * with languages with non-latin characters. + * + * @static + * @implements {lunr.PipelineFunction} + * @param {lunr.Token} token The token to pass through the filter + * @returns {lunr.Token} + * @see lunr.Pipeline + */ +lunr.trimmer = function (token) { + return token.update(function (s) { + return s.replace(/^\W+/, '').replace(/\W+$/, '') + }) +} + +lunr.Pipeline.registerFunction(lunr.trimmer, 'trimmer') +/*! + * lunr.TokenSet + * Copyright (C) 2020 Oliver Nightingale + */ + +/** + * A token set is used to store the unique list of all tokens + * within an index. Token sets are also used to represent an + * incoming query to the index, this query token set and index + * token set are then intersected to find which tokens to look + * up in the inverted index. + * + * A token set can hold multiple tokens, as in the case of the + * index token set, or it can hold a single token as in the + * case of a simple query token set. + * + * Additionally token sets are used to perform wildcard matching. + * Leading, contained and trailing wildcards are supported, and + * from this edit distance matching can also be provided. + * + * Token sets are implemented as a minimal finite state automata, + * where both common prefixes and suffixes are shared between tokens. + * This helps to reduce the space used for storing the token set. + * + * @constructor + */ +lunr.TokenSet = function () { + this.final = false + this.edges = {} + this.id = lunr.TokenSet._nextId + lunr.TokenSet._nextId += 1 +} + +/** + * Keeps track of the next, auto increment, identifier to assign + * to a new tokenSet. + * + * TokenSets require a unique identifier to be correctly minimised. + * + * @private + */ +lunr.TokenSet._nextId = 1 + +/** + * Creates a TokenSet instance from the given sorted array of words. + * + * @param {String[]} arr - A sorted array of strings to create the set from. + * @returns {lunr.TokenSet} + * @throws Will throw an error if the input array is not sorted. + */ +lunr.TokenSet.fromArray = function (arr) { + var builder = new lunr.TokenSet.Builder + + for (var i = 0, len = arr.length; i < len; i++) { + builder.insert(arr[i]) + } + + builder.finish() + return builder.root +} + +/** + * Creates a token set from a query clause. + * + * @private + * @param {Object} clause - A single clause from lunr.Query. + * @param {string} clause.term - The query clause term. + * @param {number} [clause.editDistance] - The optional edit distance for the term. + * @returns {lunr.TokenSet} + */ +lunr.TokenSet.fromClause = function (clause) { + if ('editDistance' in clause) { + return lunr.TokenSet.fromFuzzyString(clause.term, clause.editDistance) + } else { + return lunr.TokenSet.fromString(clause.term) + } +} + +/** + * Creates a token set representing a single string with a specified + * edit distance. + * + * Insertions, deletions, substitutions and transpositions are each + * treated as an edit distance of 1. + * + * Increasing the allowed edit distance will have a dramatic impact + * on the performance of both creating and intersecting these TokenSets. + * It is advised to keep the edit distance less than 3. + * + * @param {string} str - The string to create the token set from. + * @param {number} editDistance - The allowed edit distance to match. + * @returns {lunr.Vector} + */ +lunr.TokenSet.fromFuzzyString = function (str, editDistance) { + var root = new lunr.TokenSet + + var stack = [{ + node: root, + editsRemaining: editDistance, + str: str + }] + + while (stack.length) { + var frame = stack.pop() + + // no edit + if (frame.str.length > 0) { + var char = frame.str.charAt(0), + noEditNode + + if (char in frame.node.edges) { + noEditNode = frame.node.edges[char] + } else { + noEditNode = new lunr.TokenSet + frame.node.edges[char] = noEditNode + } + + if (frame.str.length == 1) { + noEditNode.final = true + } + + stack.push({ + node: noEditNode, + editsRemaining: frame.editsRemaining, + str: frame.str.slice(1) + }) + } + + if (frame.editsRemaining == 0) { + continue + } + + // insertion + if ("*" in frame.node.edges) { + var insertionNode = frame.node.edges["*"] + } else { + var insertionNode = new lunr.TokenSet + frame.node.edges["*"] = insertionNode + } + + if (frame.str.length == 0) { + insertionNode.final = true + } + + stack.push({ + node: insertionNode, + editsRemaining: frame.editsRemaining - 1, + str: frame.str + }) + + // deletion + // can only do a deletion if we have enough edits remaining + // and if there are characters left to delete in the string + if (frame.str.length > 1) { + stack.push({ + node: frame.node, + editsRemaining: frame.editsRemaining - 1, + str: frame.str.slice(1) + }) + } + + // deletion + // just removing the last character from the str + if (frame.str.length == 1) { + frame.node.final = true + } + + // substitution + // can only do a substitution if we have enough edits remaining + // and if there are characters left to substitute + if (frame.str.length >= 1) { + if ("*" in frame.node.edges) { + var substitutionNode = frame.node.edges["*"] + } else { + var substitutionNode = new lunr.TokenSet + frame.node.edges["*"] = substitutionNode + } + + if (frame.str.length == 1) { + substitutionNode.final = true + } + + stack.push({ + node: substitutionNode, + editsRemaining: frame.editsRemaining - 1, + str: frame.str.slice(1) + }) + } + + // transposition + // can only do a transposition if there are edits remaining + // and there are enough characters to transpose + if (frame.str.length > 1) { + var charA = frame.str.charAt(0), + charB = frame.str.charAt(1), + transposeNode + + if (charB in frame.node.edges) { + transposeNode = frame.node.edges[charB] + } else { + transposeNode = new lunr.TokenSet + frame.node.edges[charB] = transposeNode + } + + if (frame.str.length == 1) { + transposeNode.final = true + } + + stack.push({ + node: transposeNode, + editsRemaining: frame.editsRemaining - 1, + str: charA + frame.str.slice(2) + }) + } + } + + return root +} + +/** + * Creates a TokenSet from a string. + * + * The string may contain one or more wildcard characters (*) + * that will allow wildcard matching when intersecting with + * another TokenSet. + * + * @param {string} str - The string to create a TokenSet from. + * @returns {lunr.TokenSet} + */ +lunr.TokenSet.fromString = function (str) { + var node = new lunr.TokenSet, + root = node + + /* + * Iterates through all characters within the passed string + * appending a node for each character. + * + * When a wildcard character is found then a self + * referencing edge is introduced to continually match + * any number of any characters. + */ + for (var i = 0, len = str.length; i < len; i++) { + var char = str[i], + final = (i == len - 1) + + if (char == "*") { + node.edges[char] = node + node.final = final + + } else { + var next = new lunr.TokenSet + next.final = final + + node.edges[char] = next + node = next + } + } + + return root +} + +/** + * Converts this TokenSet into an array of strings + * contained within the TokenSet. + * + * This is not intended to be used on a TokenSet that + * contains wildcards, in these cases the results are + * undefined and are likely to cause an infinite loop. + * + * @returns {string[]} + */ +lunr.TokenSet.prototype.toArray = function () { + var words = [] + + var stack = [{ + prefix: "", + node: this + }] + + while (stack.length) { + var frame = stack.pop(), + edges = Object.keys(frame.node.edges), + len = edges.length + + if (frame.node.final) { + /* In Safari, at this point the prefix is sometimes corrupted, see: + * https://github.com/olivernn/lunr.js/issues/279 Calling any + * String.prototype method forces Safari to "cast" this string to what + * it's supposed to be, fixing the bug. */ + frame.prefix.charAt(0) + words.push(frame.prefix) + } + + for (var i = 0; i < len; i++) { + var edge = edges[i] + + stack.push({ + prefix: frame.prefix.concat(edge), + node: frame.node.edges[edge] + }) + } + } + + return words +} + +/** + * Generates a string representation of a TokenSet. + * + * This is intended to allow TokenSets to be used as keys + * in objects, largely to aid the construction and minimisation + * of a TokenSet. As such it is not designed to be a human + * friendly representation of the TokenSet. + * + * @returns {string} + */ +lunr.TokenSet.prototype.toString = function () { + // NOTE: Using Object.keys here as this.edges is very likely + // to enter 'hash-mode' with many keys being added + // + // avoiding a for-in loop here as it leads to the function + // being de-optimised (at least in V8). From some simple + // benchmarks the performance is comparable, but allowing + // V8 to optimize may mean easy performance wins in the future. + + if (this._str) { + return this._str + } + + var str = this.final ? '1' : '0', + labels = Object.keys(this.edges).sort(), + len = labels.length + + for (var i = 0; i < len; i++) { + var label = labels[i], + node = this.edges[label] + + str = str + label + node.id + } + + return str +} + +/** + * Returns a new TokenSet that is the intersection of + * this TokenSet and the passed TokenSet. + * + * This intersection will take into account any wildcards + * contained within the TokenSet. + * + * @param {lunr.TokenSet} b - An other TokenSet to intersect with. + * @returns {lunr.TokenSet} + */ +lunr.TokenSet.prototype.intersect = function (b) { + var output = new lunr.TokenSet, + frame = undefined + + var stack = [{ + qNode: b, + output: output, + node: this + }] + + while (stack.length) { + frame = stack.pop() + + // NOTE: As with the #toString method, we are using + // Object.keys and a for loop instead of a for-in loop + // as both of these objects enter 'hash' mode, causing + // the function to be de-optimised in V8 + var qEdges = Object.keys(frame.qNode.edges), + qLen = qEdges.length, + nEdges = Object.keys(frame.node.edges), + nLen = nEdges.length + + for (var q = 0; q < qLen; q++) { + var qEdge = qEdges[q] + + for (var n = 0; n < nLen; n++) { + var nEdge = nEdges[n] + + if (nEdge == qEdge || qEdge == '*') { + var node = frame.node.edges[nEdge], + qNode = frame.qNode.edges[qEdge], + final = node.final && qNode.final, + next = undefined + + if (nEdge in frame.output.edges) { + // an edge already exists for this character + // no need to create a new node, just set the finality + // bit unless this node is already final + next = frame.output.edges[nEdge] + next.final = next.final || final + + } else { + // no edge exists yet, must create one + // set the finality bit and insert it + // into the output + next = new lunr.TokenSet + next.final = final + frame.output.edges[nEdge] = next + } + + stack.push({ + qNode: qNode, + output: next, + node: node + }) + } + } + } + } + + return output +} +lunr.TokenSet.Builder = function () { + this.previousWord = "" + this.root = new lunr.TokenSet + this.uncheckedNodes = [] + this.minimizedNodes = {} +} + +lunr.TokenSet.Builder.prototype.insert = function (word) { + var node, + commonPrefix = 0 + + if (word < this.previousWord) { + throw new Error ("Out of order word insertion") + } + + for (var i = 0; i < word.length && i < this.previousWord.length; i++) { + if (word[i] != this.previousWord[i]) break + commonPrefix++ + } + + this.minimize(commonPrefix) + + if (this.uncheckedNodes.length == 0) { + node = this.root + } else { + node = this.uncheckedNodes[this.uncheckedNodes.length - 1].child + } + + for (var i = commonPrefix; i < word.length; i++) { + var nextNode = new lunr.TokenSet, + char = word[i] + + node.edges[char] = nextNode + + this.uncheckedNodes.push({ + parent: node, + char: char, + child: nextNode + }) + + node = nextNode + } + + node.final = true + this.previousWord = word +} + +lunr.TokenSet.Builder.prototype.finish = function () { + this.minimize(0) +} + +lunr.TokenSet.Builder.prototype.minimize = function (downTo) { + for (var i = this.uncheckedNodes.length - 1; i >= downTo; i--) { + var node = this.uncheckedNodes[i], + childKey = node.child.toString() + + if (childKey in this.minimizedNodes) { + node.parent.edges[node.char] = this.minimizedNodes[childKey] + } else { + // Cache the key for this node since + // we know it can't change anymore + node.child._str = childKey + + this.minimizedNodes[childKey] = node.child + } + + this.uncheckedNodes.pop() + } +} +/*! + * lunr.Index + * Copyright (C) 2020 Oliver Nightingale + */ + +/** + * An index contains the built index of all documents and provides a query interface + * to the index. + * + * Usually instances of lunr.Index will not be created using this constructor, instead + * lunr.Builder should be used to construct new indexes, or lunr.Index.load should be + * used to load previously built and serialized indexes. + * + * @constructor + * @param {Object} attrs - The attributes of the built search index. + * @param {Object} attrs.invertedIndex - An index of term/field to document reference. + * @param {Object} attrs.fieldVectors - Field vectors + * @param {lunr.TokenSet} attrs.tokenSet - An set of all corpus tokens. + * @param {string[]} attrs.fields - The names of indexed document fields. + * @param {lunr.Pipeline} attrs.pipeline - The pipeline to use for search terms. + */ +lunr.Index = function (attrs) { + this.invertedIndex = attrs.invertedIndex + this.fieldVectors = attrs.fieldVectors + this.tokenSet = attrs.tokenSet + this.fields = attrs.fields + this.pipeline = attrs.pipeline +} + +/** + * A result contains details of a document matching a search query. + * @typedef {Object} lunr.Index~Result + * @property {string} ref - The reference of the document this result represents. + * @property {number} score - A number between 0 and 1 representing how similar this document is to the query. + * @property {lunr.MatchData} matchData - Contains metadata about this match including which term(s) caused the match. + */ + +/** + * Although lunr provides the ability to create queries using lunr.Query, it also provides a simple + * query language which itself is parsed into an instance of lunr.Query. + * + * For programmatically building queries it is advised to directly use lunr.Query, the query language + * is best used for human entered text rather than program generated text. + * + * At its simplest queries can just be a single term, e.g. `hello`, multiple terms are also supported + * and will be combined with OR, e.g `hello world` will match documents that contain either 'hello' + * or 'world', though those that contain both will rank higher in the results. + * + * Wildcards can be included in terms to match one or more unspecified characters, these wildcards can + * be inserted anywhere within the term, and more than one wildcard can exist in a single term. Adding + * wildcards will increase the number of documents that will be found but can also have a negative + * impact on query performance, especially with wildcards at the beginning of a term. + * + * Terms can be restricted to specific fields, e.g. `title:hello`, only documents with the term + * hello in the title field will match this query. Using a field not present in the index will lead + * to an error being thrown. + * + * Modifiers can also be added to terms, lunr supports edit distance and boost modifiers on terms. A term + * boost will make documents matching that term score higher, e.g. `foo^5`. Edit distance is also supported + * to provide fuzzy matching, e.g. 'hello~2' will match documents with hello with an edit distance of 2. + * Avoid large values for edit distance to improve query performance. + * + * Each term also supports a presence modifier. By default a term's presence in document is optional, however + * this can be changed to either required or prohibited. For a term's presence to be required in a document the + * term should be prefixed with a '+', e.g. `+foo bar` is a search for documents that must contain 'foo' and + * optionally contain 'bar'. Conversely a leading '-' sets the terms presence to prohibited, i.e. it must not + * appear in a document, e.g. `-foo bar` is a search for documents that do not contain 'foo' but may contain 'bar'. + * + * To escape special characters the backslash character '\' can be used, this allows searches to include + * characters that would normally be considered modifiers, e.g. `foo\~2` will search for a term "foo~2" instead + * of attempting to apply a boost of 2 to the search term "foo". + * + * @typedef {string} lunr.Index~QueryString + * @example Simple single term query + * hello + * @example Multiple term query + * hello world + * @example term scoped to a field + * title:hello + * @example term with a boost of 10 + * hello^10 + * @example term with an edit distance of 2 + * hello~2 + * @example terms with presence modifiers + * -foo +bar baz + */ + +/** + * Performs a search against the index using lunr query syntax. + * + * Results will be returned sorted by their score, the most relevant results + * will be returned first. For details on how the score is calculated, please see + * the {@link https://lunrjs.com/guides/searching.html#scoring|guide}. + * + * For more programmatic querying use lunr.Index#query. + * + * @param {lunr.Index~QueryString} queryString - A string containing a lunr query. + * @throws {lunr.QueryParseError} If the passed query string cannot be parsed. + * @returns {lunr.Index~Result[]} + */ +lunr.Index.prototype.search = function (queryString) { + return this.query(function (query) { + var parser = new lunr.QueryParser(queryString, query) + parser.parse() + }) +} + +/** + * A query builder callback provides a query object to be used to express + * the query to perform on the index. + * + * @callback lunr.Index~queryBuilder + * @param {lunr.Query} query - The query object to build up. + * @this lunr.Query + */ + +/** + * Performs a query against the index using the yielded lunr.Query object. + * + * If performing programmatic queries against the index, this method is preferred + * over lunr.Index#search so as to avoid the additional query parsing overhead. + * + * A query object is yielded to the supplied function which should be used to + * express the query to be run against the index. + * + * Note that although this function takes a callback parameter it is _not_ an + * asynchronous operation, the callback is just yielded a query object to be + * customized. + * + * @param {lunr.Index~queryBuilder} fn - A function that is used to build the query. + * @returns {lunr.Index~Result[]} + */ +lunr.Index.prototype.query = function (fn) { + // for each query clause + // * process terms + // * expand terms from token set + // * find matching documents and metadata + // * get document vectors + // * score documents + + var query = new lunr.Query(this.fields), + matchingFields = Object.create(null), + queryVectors = Object.create(null), + termFieldCache = Object.create(null), + requiredMatches = Object.create(null), + prohibitedMatches = Object.create(null) + + /* + * To support field level boosts a query vector is created per + * field. An empty vector is eagerly created to support negated + * queries. + */ + for (var i = 0; i < this.fields.length; i++) { + queryVectors[this.fields[i]] = new lunr.Vector + } + + fn.call(query, query) + + for (var i = 0; i < query.clauses.length; i++) { + /* + * Unless the pipeline has been disabled for this term, which is + * the case for terms with wildcards, we need to pass the clause + * term through the search pipeline. A pipeline returns an array + * of processed terms. Pipeline functions may expand the passed + * term, which means we may end up performing multiple index lookups + * for a single query term. + */ + var clause = query.clauses[i], + terms = null, + clauseMatches = lunr.Set.empty + + if (clause.usePipeline) { + terms = this.pipeline.runString(clause.term, { + fields: clause.fields + }) + } else { + terms = [clause.term] + } + + for (var m = 0; m < terms.length; m++) { + var term = terms[m] + + /* + * Each term returned from the pipeline needs to use the same query + * clause object, e.g. the same boost and or edit distance. The + * simplest way to do this is to re-use the clause object but mutate + * its term property. + */ + clause.term = term + + /* + * From the term in the clause we create a token set which will then + * be used to intersect the indexes token set to get a list of terms + * to lookup in the inverted index + */ + var termTokenSet = lunr.TokenSet.fromClause(clause), + expandedTerms = this.tokenSet.intersect(termTokenSet).toArray() + + /* + * If a term marked as required does not exist in the tokenSet it is + * impossible for the search to return any matches. We set all the field + * scoped required matches set to empty and stop examining any further + * clauses. + */ + if (expandedTerms.length === 0 && clause.presence === lunr.Query.presence.REQUIRED) { + for (var k = 0; k < clause.fields.length; k++) { + var field = clause.fields[k] + requiredMatches[field] = lunr.Set.empty + } + + break + } + + for (var j = 0; j < expandedTerms.length; j++) { + /* + * For each term get the posting and termIndex, this is required for + * building the query vector. + */ + var expandedTerm = expandedTerms[j], + posting = this.invertedIndex[expandedTerm], + termIndex = posting._index + + for (var k = 0; k < clause.fields.length; k++) { + /* + * For each field that this query term is scoped by (by default + * all fields are in scope) we need to get all the document refs + * that have this term in that field. + * + * The posting is the entry in the invertedIndex for the matching + * term from above. + */ + var field = clause.fields[k], + fieldPosting = posting[field], + matchingDocumentRefs = Object.keys(fieldPosting), + termField = expandedTerm + "/" + field, + matchingDocumentsSet = new lunr.Set(matchingDocumentRefs) + + /* + * if the presence of this term is required ensure that the matching + * documents are added to the set of required matches for this clause. + * + */ + if (clause.presence == lunr.Query.presence.REQUIRED) { + clauseMatches = clauseMatches.union(matchingDocumentsSet) + + if (requiredMatches[field] === undefined) { + requiredMatches[field] = lunr.Set.complete + } + } + + /* + * if the presence of this term is prohibited ensure that the matching + * documents are added to the set of prohibited matches for this field, + * creating that set if it does not yet exist. + */ + if (clause.presence == lunr.Query.presence.PROHIBITED) { + if (prohibitedMatches[field] === undefined) { + prohibitedMatches[field] = lunr.Set.empty + } + + prohibitedMatches[field] = prohibitedMatches[field].union(matchingDocumentsSet) + + /* + * Prohibited matches should not be part of the query vector used for + * similarity scoring and no metadata should be extracted so we continue + * to the next field + */ + continue + } + + /* + * The query field vector is populated using the termIndex found for + * the term and a unit value with the appropriate boost applied. + * Using upsert because there could already be an entry in the vector + * for the term we are working with. In that case we just add the scores + * together. + */ + queryVectors[field].upsert(termIndex, clause.boost, function (a, b) { return a + b }) + + /** + * If we've already seen this term, field combo then we've already collected + * the matching documents and metadata, no need to go through all that again + */ + if (termFieldCache[termField]) { + continue + } + + for (var l = 0; l < matchingDocumentRefs.length; l++) { + /* + * All metadata for this term/field/document triple + * are then extracted and collected into an instance + * of lunr.MatchData ready to be returned in the query + * results + */ + var matchingDocumentRef = matchingDocumentRefs[l], + matchingFieldRef = new lunr.FieldRef (matchingDocumentRef, field), + metadata = fieldPosting[matchingDocumentRef], + fieldMatch + + if ((fieldMatch = matchingFields[matchingFieldRef]) === undefined) { + matchingFields[matchingFieldRef] = new lunr.MatchData (expandedTerm, field, metadata) + } else { + fieldMatch.add(expandedTerm, field, metadata) + } + + } + + termFieldCache[termField] = true + } + } + } + + /** + * If the presence was required we need to update the requiredMatches field sets. + * We do this after all fields for the term have collected their matches because + * the clause terms presence is required in _any_ of the fields not _all_ of the + * fields. + */ + if (clause.presence === lunr.Query.presence.REQUIRED) { + for (var k = 0; k < clause.fields.length; k++) { + var field = clause.fields[k] + requiredMatches[field] = requiredMatches[field].intersect(clauseMatches) + } + } + } + + /** + * Need to combine the field scoped required and prohibited + * matching documents into a global set of required and prohibited + * matches + */ + var allRequiredMatches = lunr.Set.complete, + allProhibitedMatches = lunr.Set.empty + + for (var i = 0; i < this.fields.length; i++) { + var field = this.fields[i] + + if (requiredMatches[field]) { + allRequiredMatches = allRequiredMatches.intersect(requiredMatches[field]) + } + + if (prohibitedMatches[field]) { + allProhibitedMatches = allProhibitedMatches.union(prohibitedMatches[field]) + } + } + + var matchingFieldRefs = Object.keys(matchingFields), + results = [], + matches = Object.create(null) + + /* + * If the query is negated (contains only prohibited terms) + * we need to get _all_ fieldRefs currently existing in the + * index. This is only done when we know that the query is + * entirely prohibited terms to avoid any cost of getting all + * fieldRefs unnecessarily. + * + * Additionally, blank MatchData must be created to correctly + * populate the results. + */ + if (query.isNegated()) { + matchingFieldRefs = Object.keys(this.fieldVectors) + + for (var i = 0; i < matchingFieldRefs.length; i++) { + var matchingFieldRef = matchingFieldRefs[i] + var fieldRef = lunr.FieldRef.fromString(matchingFieldRef) + matchingFields[matchingFieldRef] = new lunr.MatchData + } + } + + for (var i = 0; i < matchingFieldRefs.length; i++) { + /* + * Currently we have document fields that match the query, but we + * need to return documents. The matchData and scores are combined + * from multiple fields belonging to the same document. + * + * Scores are calculated by field, using the query vectors created + * above, and combined into a final document score using addition. + */ + var fieldRef = lunr.FieldRef.fromString(matchingFieldRefs[i]), + docRef = fieldRef.docRef + + if (!allRequiredMatches.contains(docRef)) { + continue + } + + if (allProhibitedMatches.contains(docRef)) { + continue + } + + var fieldVector = this.fieldVectors[fieldRef], + score = queryVectors[fieldRef.fieldName].similarity(fieldVector), + docMatch + + if ((docMatch = matches[docRef]) !== undefined) { + docMatch.score += score + docMatch.matchData.combine(matchingFields[fieldRef]) + } else { + var match = { + ref: docRef, + score: score, + matchData: matchingFields[fieldRef] + } + matches[docRef] = match + results.push(match) + } + } + + /* + * Sort the results objects by score, highest first. + */ + return results.sort(function (a, b) { + return b.score - a.score + }) +} + +/** + * Prepares the index for JSON serialization. + * + * The schema for this JSON blob will be described in a + * separate JSON schema file. + * + * @returns {Object} + */ +lunr.Index.prototype.toJSON = function () { + var invertedIndex = Object.keys(this.invertedIndex) + .sort() + .map(function (term) { + return [term, this.invertedIndex[term]] + }, this) + + var fieldVectors = Object.keys(this.fieldVectors) + .map(function (ref) { + return [ref, this.fieldVectors[ref].toJSON()] + }, this) + + return { + version: lunr.version, + fields: this.fields, + fieldVectors: fieldVectors, + invertedIndex: invertedIndex, + pipeline: this.pipeline.toJSON() + } +} + +/** + * Loads a previously serialized lunr.Index + * + * @param {Object} serializedIndex - A previously serialized lunr.Index + * @returns {lunr.Index} + */ +lunr.Index.load = function (serializedIndex) { + var attrs = {}, + fieldVectors = {}, + serializedVectors = serializedIndex.fieldVectors, + invertedIndex = Object.create(null), + serializedInvertedIndex = serializedIndex.invertedIndex, + tokenSetBuilder = new lunr.TokenSet.Builder, + pipeline = lunr.Pipeline.load(serializedIndex.pipeline) + + if (serializedIndex.version != lunr.version) { + lunr.utils.warn("Version mismatch when loading serialised index. Current version of lunr '" + lunr.version + "' does not match serialized index '" + serializedIndex.version + "'") + } + + for (var i = 0; i < serializedVectors.length; i++) { + var tuple = serializedVectors[i], + ref = tuple[0], + elements = tuple[1] + + fieldVectors[ref] = new lunr.Vector(elements) + } + + for (var i = 0; i < serializedInvertedIndex.length; i++) { + var tuple = serializedInvertedIndex[i], + term = tuple[0], + posting = tuple[1] + + tokenSetBuilder.insert(term) + invertedIndex[term] = posting + } + + tokenSetBuilder.finish() + + attrs.fields = serializedIndex.fields + + attrs.fieldVectors = fieldVectors + attrs.invertedIndex = invertedIndex + attrs.tokenSet = tokenSetBuilder.root + attrs.pipeline = pipeline + + return new lunr.Index(attrs) +} +/*! + * lunr.Builder + * Copyright (C) 2020 Oliver Nightingale + */ + +/** + * lunr.Builder performs indexing on a set of documents and + * returns instances of lunr.Index ready for querying. + * + * All configuration of the index is done via the builder, the + * fields to index, the document reference, the text processing + * pipeline and document scoring parameters are all set on the + * builder before indexing. + * + * @constructor + * @property {string} _ref - Internal reference to the document reference field. + * @property {string[]} _fields - Internal reference to the document fields to index. + * @property {object} invertedIndex - The inverted index maps terms to document fields. + * @property {object} documentTermFrequencies - Keeps track of document term frequencies. + * @property {object} documentLengths - Keeps track of the length of documents added to the index. + * @property {lunr.tokenizer} tokenizer - Function for splitting strings into tokens for indexing. + * @property {lunr.Pipeline} pipeline - The pipeline performs text processing on tokens before indexing. + * @property {lunr.Pipeline} searchPipeline - A pipeline for processing search terms before querying the index. + * @property {number} documentCount - Keeps track of the total number of documents indexed. + * @property {number} _b - A parameter to control field length normalization, setting this to 0 disabled normalization, 1 fully normalizes field lengths, the default value is 0.75. + * @property {number} _k1 - A parameter to control how quickly an increase in term frequency results in term frequency saturation, the default value is 1.2. + * @property {number} termIndex - A counter incremented for each unique term, used to identify a terms position in the vector space. + * @property {array} metadataWhitelist - A list of metadata keys that have been whitelisted for entry in the index. + */ +lunr.Builder = function () { + this._ref = "id" + this._fields = Object.create(null) + this._documents = Object.create(null) + this.invertedIndex = Object.create(null) + this.fieldTermFrequencies = {} + this.fieldLengths = {} + this.tokenizer = lunr.tokenizer + this.pipeline = new lunr.Pipeline + this.searchPipeline = new lunr.Pipeline + this.documentCount = 0 + this._b = 0.75 + this._k1 = 1.2 + this.termIndex = 0 + this.metadataWhitelist = [] +} + +/** + * Sets the document field used as the document reference. Every document must have this field. + * The type of this field in the document should be a string, if it is not a string it will be + * coerced into a string by calling toString. + * + * The default ref is 'id'. + * + * The ref should _not_ be changed during indexing, it should be set before any documents are + * added to the index. Changing it during indexing can lead to inconsistent results. + * + * @param {string} ref - The name of the reference field in the document. + */ +lunr.Builder.prototype.ref = function (ref) { + this._ref = ref +} + +/** + * A function that is used to extract a field from a document. + * + * Lunr expects a field to be at the top level of a document, if however the field + * is deeply nested within a document an extractor function can be used to extract + * the right field for indexing. + * + * @callback fieldExtractor + * @param {object} doc - The document being added to the index. + * @returns {?(string|object|object[])} obj - The object that will be indexed for this field. + * @example Extracting a nested field + * function (doc) { return doc.nested.field } + */ + +/** + * Adds a field to the list of document fields that will be indexed. Every document being + * indexed should have this field. Null values for this field in indexed documents will + * not cause errors but will limit the chance of that document being retrieved by searches. + * + * All fields should be added before adding documents to the index. Adding fields after + * a document has been indexed will have no effect on already indexed documents. + * + * Fields can be boosted at build time. This allows terms within that field to have more + * importance when ranking search results. Use a field boost to specify that matches within + * one field are more important than other fields. + * + * @param {string} fieldName - The name of a field to index in all documents. + * @param {object} attributes - Optional attributes associated with this field. + * @param {number} [attributes.boost=1] - Boost applied to all terms within this field. + * @param {fieldExtractor} [attributes.extractor] - Function to extract a field from a document. + * @throws {RangeError} fieldName cannot contain unsupported characters '/' + */ +lunr.Builder.prototype.field = function (fieldName, attributes) { + if (/\//.test(fieldName)) { + throw new RangeError ("Field '" + fieldName + "' contains illegal character '/'") + } + + this._fields[fieldName] = attributes || {} +} + +/** + * A parameter to tune the amount of field length normalisation that is applied when + * calculating relevance scores. A value of 0 will completely disable any normalisation + * and a value of 1 will fully normalise field lengths. The default is 0.75. Values of b + * will be clamped to the range 0 - 1. + * + * @param {number} number - The value to set for this tuning parameter. + */ +lunr.Builder.prototype.b = function (number) { + if (number < 0) { + this._b = 0 + } else if (number > 1) { + this._b = 1 + } else { + this._b = number + } +} + +/** + * A parameter that controls the speed at which a rise in term frequency results in term + * frequency saturation. The default value is 1.2. Setting this to a higher value will give + * slower saturation levels, a lower value will result in quicker saturation. + * + * @param {number} number - The value to set for this tuning parameter. + */ +lunr.Builder.prototype.k1 = function (number) { + this._k1 = number +} + +/** + * Adds a document to the index. + * + * Before adding fields to the index the index should have been fully setup, with the document + * ref and all fields to index already having been specified. + * + * The document must have a field name as specified by the ref (by default this is 'id') and + * it should have all fields defined for indexing, though null or undefined values will not + * cause errors. + * + * Entire documents can be boosted at build time. Applying a boost to a document indicates that + * this document should rank higher in search results than other documents. + * + * @param {object} doc - The document to add to the index. + * @param {object} attributes - Optional attributes associated with this document. + * @param {number} [attributes.boost=1] - Boost applied to all terms within this document. + */ +lunr.Builder.prototype.add = function (doc, attributes) { + var docRef = doc[this._ref], + fields = Object.keys(this._fields) + + this._documents[docRef] = attributes || {} + this.documentCount += 1 + + for (var i = 0; i < fields.length; i++) { + var fieldName = fields[i], + extractor = this._fields[fieldName].extractor, + field = extractor ? extractor(doc) : doc[fieldName], + tokens = this.tokenizer(field, { + fields: [fieldName] + }), + terms = this.pipeline.run(tokens), + fieldRef = new lunr.FieldRef (docRef, fieldName), + fieldTerms = Object.create(null) + + this.fieldTermFrequencies[fieldRef] = fieldTerms + this.fieldLengths[fieldRef] = 0 + + // store the length of this field for this document + this.fieldLengths[fieldRef] += terms.length + + // calculate term frequencies for this field + for (var j = 0; j < terms.length; j++) { + var term = terms[j] + + if (fieldTerms[term] == undefined) { + fieldTerms[term] = 0 + } + + fieldTerms[term] += 1 + + // add to inverted index + // create an initial posting if one doesn't exist + if (this.invertedIndex[term] == undefined) { + var posting = Object.create(null) + posting["_index"] = this.termIndex + this.termIndex += 1 + + for (var k = 0; k < fields.length; k++) { + posting[fields[k]] = Object.create(null) + } + + this.invertedIndex[term] = posting + } + + // add an entry for this term/fieldName/docRef to the invertedIndex + if (this.invertedIndex[term][fieldName][docRef] == undefined) { + this.invertedIndex[term][fieldName][docRef] = Object.create(null) + } + + // store all whitelisted metadata about this token in the + // inverted index + for (var l = 0; l < this.metadataWhitelist.length; l++) { + var metadataKey = this.metadataWhitelist[l], + metadata = term.metadata[metadataKey] + + if (this.invertedIndex[term][fieldName][docRef][metadataKey] == undefined) { + this.invertedIndex[term][fieldName][docRef][metadataKey] = [] + } + + this.invertedIndex[term][fieldName][docRef][metadataKey].push(metadata) + } + } + + } +} + +/** + * Calculates the average document length for this index + * + * @private + */ +lunr.Builder.prototype.calculateAverageFieldLengths = function () { + + var fieldRefs = Object.keys(this.fieldLengths), + numberOfFields = fieldRefs.length, + accumulator = {}, + documentsWithField = {} + + for (var i = 0; i < numberOfFields; i++) { + var fieldRef = lunr.FieldRef.fromString(fieldRefs[i]), + field = fieldRef.fieldName + + documentsWithField[field] || (documentsWithField[field] = 0) + documentsWithField[field] += 1 + + accumulator[field] || (accumulator[field] = 0) + accumulator[field] += this.fieldLengths[fieldRef] + } + + var fields = Object.keys(this._fields) + + for (var i = 0; i < fields.length; i++) { + var fieldName = fields[i] + accumulator[fieldName] = accumulator[fieldName] / documentsWithField[fieldName] + } + + this.averageFieldLength = accumulator +} + +/** + * Builds a vector space model of every document using lunr.Vector + * + * @private + */ +lunr.Builder.prototype.createFieldVectors = function () { + var fieldVectors = {}, + fieldRefs = Object.keys(this.fieldTermFrequencies), + fieldRefsLength = fieldRefs.length, + termIdfCache = Object.create(null) + + for (var i = 0; i < fieldRefsLength; i++) { + var fieldRef = lunr.FieldRef.fromString(fieldRefs[i]), + fieldName = fieldRef.fieldName, + fieldLength = this.fieldLengths[fieldRef], + fieldVector = new lunr.Vector, + termFrequencies = this.fieldTermFrequencies[fieldRef], + terms = Object.keys(termFrequencies), + termsLength = terms.length + + + var fieldBoost = this._fields[fieldName].boost || 1, + docBoost = this._documents[fieldRef.docRef].boost || 1 + + for (var j = 0; j < termsLength; j++) { + var term = terms[j], + tf = termFrequencies[term], + termIndex = this.invertedIndex[term]._index, + idf, score, scoreWithPrecision + + if (termIdfCache[term] === undefined) { + idf = lunr.idf(this.invertedIndex[term], this.documentCount) + termIdfCache[term] = idf + } else { + idf = termIdfCache[term] + } + + score = idf * ((this._k1 + 1) * tf) / (this._k1 * (1 - this._b + this._b * (fieldLength / this.averageFieldLength[fieldName])) + tf) + score *= fieldBoost + score *= docBoost + scoreWithPrecision = Math.round(score * 1000) / 1000 + // Converts 1.23456789 to 1.234. + // Reducing the precision so that the vectors take up less + // space when serialised. Doing it now so that they behave + // the same before and after serialisation. Also, this is + // the fastest approach to reducing a number's precision in + // JavaScript. + + fieldVector.insert(termIndex, scoreWithPrecision) + } + + fieldVectors[fieldRef] = fieldVector + } + + this.fieldVectors = fieldVectors +} + +/** + * Creates a token set of all tokens in the index using lunr.TokenSet + * + * @private + */ +lunr.Builder.prototype.createTokenSet = function () { + this.tokenSet = lunr.TokenSet.fromArray( + Object.keys(this.invertedIndex).sort() + ) +} + +/** + * Builds the index, creating an instance of lunr.Index. + * + * This completes the indexing process and should only be called + * once all documents have been added to the index. + * + * @returns {lunr.Index} + */ +lunr.Builder.prototype.build = function () { + this.calculateAverageFieldLengths() + this.createFieldVectors() + this.createTokenSet() + + return new lunr.Index({ + invertedIndex: this.invertedIndex, + fieldVectors: this.fieldVectors, + tokenSet: this.tokenSet, + fields: Object.keys(this._fields), + pipeline: this.searchPipeline + }) +} + +/** + * Applies a plugin to the index builder. + * + * A plugin is a function that is called with the index builder as its context. + * Plugins can be used to customise or extend the behaviour of the index + * in some way. A plugin is just a function, that encapsulated the custom + * behaviour that should be applied when building the index. + * + * The plugin function will be called with the index builder as its argument, additional + * arguments can also be passed when calling use. The function will be called + * with the index builder as its context. + * + * @param {Function} plugin The plugin to apply. + */ +lunr.Builder.prototype.use = function (fn) { + var args = Array.prototype.slice.call(arguments, 1) + args.unshift(this) + fn.apply(this, args) +} +/** + * Contains and collects metadata about a matching document. + * A single instance of lunr.MatchData is returned as part of every + * lunr.Index~Result. + * + * @constructor + * @param {string} term - The term this match data is associated with + * @param {string} field - The field in which the term was found + * @param {object} metadata - The metadata recorded about this term in this field + * @property {object} metadata - A cloned collection of metadata associated with this document. + * @see {@link lunr.Index~Result} + */ +lunr.MatchData = function (term, field, metadata) { + var clonedMetadata = Object.create(null), + metadataKeys = Object.keys(metadata || {}) + + // Cloning the metadata to prevent the original + // being mutated during match data combination. + // Metadata is kept in an array within the inverted + // index so cloning the data can be done with + // Array#slice + for (var i = 0; i < metadataKeys.length; i++) { + var key = metadataKeys[i] + clonedMetadata[key] = metadata[key].slice() + } + + this.metadata = Object.create(null) + + if (term !== undefined) { + this.metadata[term] = Object.create(null) + this.metadata[term][field] = clonedMetadata + } +} + +/** + * An instance of lunr.MatchData will be created for every term that matches a + * document. However only one instance is required in a lunr.Index~Result. This + * method combines metadata from another instance of lunr.MatchData with this + * objects metadata. + * + * @param {lunr.MatchData} otherMatchData - Another instance of match data to merge with this one. + * @see {@link lunr.Index~Result} + */ +lunr.MatchData.prototype.combine = function (otherMatchData) { + var terms = Object.keys(otherMatchData.metadata) + + for (var i = 0; i < terms.length; i++) { + var term = terms[i], + fields = Object.keys(otherMatchData.metadata[term]) + + if (this.metadata[term] == undefined) { + this.metadata[term] = Object.create(null) + } + + for (var j = 0; j < fields.length; j++) { + var field = fields[j], + keys = Object.keys(otherMatchData.metadata[term][field]) + + if (this.metadata[term][field] == undefined) { + this.metadata[term][field] = Object.create(null) + } + + for (var k = 0; k < keys.length; k++) { + var key = keys[k] + + if (this.metadata[term][field][key] == undefined) { + this.metadata[term][field][key] = otherMatchData.metadata[term][field][key] + } else { + this.metadata[term][field][key] = this.metadata[term][field][key].concat(otherMatchData.metadata[term][field][key]) + } + + } + } + } +} + +/** + * Add metadata for a term/field pair to this instance of match data. + * + * @param {string} term - The term this match data is associated with + * @param {string} field - The field in which the term was found + * @param {object} metadata - The metadata recorded about this term in this field + */ +lunr.MatchData.prototype.add = function (term, field, metadata) { + if (!(term in this.metadata)) { + this.metadata[term] = Object.create(null) + this.metadata[term][field] = metadata + return + } + + if (!(field in this.metadata[term])) { + this.metadata[term][field] = metadata + return + } + + var metadataKeys = Object.keys(metadata) + + for (var i = 0; i < metadataKeys.length; i++) { + var key = metadataKeys[i] + + if (key in this.metadata[term][field]) { + this.metadata[term][field][key] = this.metadata[term][field][key].concat(metadata[key]) + } else { + this.metadata[term][field][key] = metadata[key] + } + } +} +/** + * A lunr.Query provides a programmatic way of defining queries to be performed + * against a {@link lunr.Index}. + * + * Prefer constructing a lunr.Query using the {@link lunr.Index#query} method + * so the query object is pre-initialized with the right index fields. + * + * @constructor + * @property {lunr.Query~Clause[]} clauses - An array of query clauses. + * @property {string[]} allFields - An array of all available fields in a lunr.Index. + */ +lunr.Query = function (allFields) { + this.clauses = [] + this.allFields = allFields +} + +/** + * Constants for indicating what kind of automatic wildcard insertion will be used when constructing a query clause. + * + * This allows wildcards to be added to the beginning and end of a term without having to manually do any string + * concatenation. + * + * The wildcard constants can be bitwise combined to select both leading and trailing wildcards. + * + * @constant + * @default + * @property {number} wildcard.NONE - The term will have no wildcards inserted, this is the default behaviour + * @property {number} wildcard.LEADING - Prepend the term with a wildcard, unless a leading wildcard already exists + * @property {number} wildcard.TRAILING - Append a wildcard to the term, unless a trailing wildcard already exists + * @see lunr.Query~Clause + * @see lunr.Query#clause + * @see lunr.Query#term + * @example query term with trailing wildcard + * query.term('foo', { wildcard: lunr.Query.wildcard.TRAILING }) + * @example query term with leading and trailing wildcard + * query.term('foo', { + * wildcard: lunr.Query.wildcard.LEADING | lunr.Query.wildcard.TRAILING + * }) + */ + +lunr.Query.wildcard = new String ("*") +lunr.Query.wildcard.NONE = 0 +lunr.Query.wildcard.LEADING = 1 +lunr.Query.wildcard.TRAILING = 2 + +/** + * Constants for indicating what kind of presence a term must have in matching documents. + * + * @constant + * @enum {number} + * @see lunr.Query~Clause + * @see lunr.Query#clause + * @see lunr.Query#term + * @example query term with required presence + * query.term('foo', { presence: lunr.Query.presence.REQUIRED }) + */ +lunr.Query.presence = { + /** + * Term's presence in a document is optional, this is the default value. + */ + OPTIONAL: 1, + + /** + * Term's presence in a document is required, documents that do not contain + * this term will not be returned. + */ + REQUIRED: 2, + + /** + * Term's presence in a document is prohibited, documents that do contain + * this term will not be returned. + */ + PROHIBITED: 3 +} + +/** + * A single clause in a {@link lunr.Query} contains a term and details on how to + * match that term against a {@link lunr.Index}. + * + * @typedef {Object} lunr.Query~Clause + * @property {string[]} fields - The fields in an index this clause should be matched against. + * @property {number} [boost=1] - Any boost that should be applied when matching this clause. + * @property {number} [editDistance] - Whether the term should have fuzzy matching applied, and how fuzzy the match should be. + * @property {boolean} [usePipeline] - Whether the term should be passed through the search pipeline. + * @property {number} [wildcard=lunr.Query.wildcard.NONE] - Whether the term should have wildcards appended or prepended. + * @property {number} [presence=lunr.Query.presence.OPTIONAL] - The terms presence in any matching documents. + */ + +/** + * Adds a {@link lunr.Query~Clause} to this query. + * + * Unless the clause contains the fields to be matched all fields will be matched. In addition + * a default boost of 1 is applied to the clause. + * + * @param {lunr.Query~Clause} clause - The clause to add to this query. + * @see lunr.Query~Clause + * @returns {lunr.Query} + */ +lunr.Query.prototype.clause = function (clause) { + if (!('fields' in clause)) { + clause.fields = this.allFields + } + + if (!('boost' in clause)) { + clause.boost = 1 + } + + if (!('usePipeline' in clause)) { + clause.usePipeline = true + } + + if (!('wildcard' in clause)) { + clause.wildcard = lunr.Query.wildcard.NONE + } + + if ((clause.wildcard & lunr.Query.wildcard.LEADING) && (clause.term.charAt(0) != lunr.Query.wildcard)) { + clause.term = "*" + clause.term + } + + if ((clause.wildcard & lunr.Query.wildcard.TRAILING) && (clause.term.slice(-1) != lunr.Query.wildcard)) { + clause.term = "" + clause.term + "*" + } + + if (!('presence' in clause)) { + clause.presence = lunr.Query.presence.OPTIONAL + } + + this.clauses.push(clause) + + return this +} + +/** + * A negated query is one in which every clause has a presence of + * prohibited. These queries require some special processing to return + * the expected results. + * + * @returns boolean + */ +lunr.Query.prototype.isNegated = function () { + for (var i = 0; i < this.clauses.length; i++) { + if (this.clauses[i].presence != lunr.Query.presence.PROHIBITED) { + return false + } + } + + return true +} + +/** + * Adds a term to the current query, under the covers this will create a {@link lunr.Query~Clause} + * to the list of clauses that make up this query. + * + * The term is used as is, i.e. no tokenization will be performed by this method. Instead conversion + * to a token or token-like string should be done before calling this method. + * + * The term will be converted to a string by calling `toString`. Multiple terms can be passed as an + * array, each term in the array will share the same options. + * + * @param {object|object[]} term - The term(s) to add to the query. + * @param {object} [options] - Any additional properties to add to the query clause. + * @returns {lunr.Query} + * @see lunr.Query#clause + * @see lunr.Query~Clause + * @example adding a single term to a query + * query.term("foo") + * @example adding a single term to a query and specifying search fields, term boost and automatic trailing wildcard + * query.term("foo", { + * fields: ["title"], + * boost: 10, + * wildcard: lunr.Query.wildcard.TRAILING + * }) + * @example using lunr.tokenizer to convert a string to tokens before using them as terms + * query.term(lunr.tokenizer("foo bar")) + */ +lunr.Query.prototype.term = function (term, options) { + if (Array.isArray(term)) { + term.forEach(function (t) { this.term(t, lunr.utils.clone(options)) }, this) + return this + } + + var clause = options || {} + clause.term = term.toString() + + this.clause(clause) + + return this +} +lunr.QueryParseError = function (message, start, end) { + this.name = "QueryParseError" + this.message = message + this.start = start + this.end = end +} + +lunr.QueryParseError.prototype = new Error +lunr.QueryLexer = function (str) { + this.lexemes = [] + this.str = str + this.length = str.length + this.pos = 0 + this.start = 0 + this.escapeCharPositions = [] +} + +lunr.QueryLexer.prototype.run = function () { + var state = lunr.QueryLexer.lexText + + while (state) { + state = state(this) + } +} + +lunr.QueryLexer.prototype.sliceString = function () { + var subSlices = [], + sliceStart = this.start, + sliceEnd = this.pos + + for (var i = 0; i < this.escapeCharPositions.length; i++) { + sliceEnd = this.escapeCharPositions[i] + subSlices.push(this.str.slice(sliceStart, sliceEnd)) + sliceStart = sliceEnd + 1 + } + + subSlices.push(this.str.slice(sliceStart, this.pos)) + this.escapeCharPositions.length = 0 + + return subSlices.join('') +} + +lunr.QueryLexer.prototype.emit = function (type) { + this.lexemes.push({ + type: type, + str: this.sliceString(), + start: this.start, + end: this.pos + }) + + this.start = this.pos +} + +lunr.QueryLexer.prototype.escapeCharacter = function () { + this.escapeCharPositions.push(this.pos - 1) + this.pos += 1 +} + +lunr.QueryLexer.prototype.next = function () { + if (this.pos >= this.length) { + return lunr.QueryLexer.EOS + } + + var char = this.str.charAt(this.pos) + this.pos += 1 + return char +} + +lunr.QueryLexer.prototype.width = function () { + return this.pos - this.start +} + +lunr.QueryLexer.prototype.ignore = function () { + if (this.start == this.pos) { + this.pos += 1 + } + + this.start = this.pos +} + +lunr.QueryLexer.prototype.backup = function () { + this.pos -= 1 +} + +lunr.QueryLexer.prototype.acceptDigitRun = function () { + var char, charCode + + do { + char = this.next() + charCode = char.charCodeAt(0) + } while (charCode > 47 && charCode < 58) + + if (char != lunr.QueryLexer.EOS) { + this.backup() + } +} + +lunr.QueryLexer.prototype.more = function () { + return this.pos < this.length +} + +lunr.QueryLexer.EOS = 'EOS' +lunr.QueryLexer.FIELD = 'FIELD' +lunr.QueryLexer.TERM = 'TERM' +lunr.QueryLexer.EDIT_DISTANCE = 'EDIT_DISTANCE' +lunr.QueryLexer.BOOST = 'BOOST' +lunr.QueryLexer.PRESENCE = 'PRESENCE' + +lunr.QueryLexer.lexField = function (lexer) { + lexer.backup() + lexer.emit(lunr.QueryLexer.FIELD) + lexer.ignore() + return lunr.QueryLexer.lexText +} + +lunr.QueryLexer.lexTerm = function (lexer) { + if (lexer.width() > 1) { + lexer.backup() + lexer.emit(lunr.QueryLexer.TERM) + } + + lexer.ignore() + + if (lexer.more()) { + return lunr.QueryLexer.lexText + } +} + +lunr.QueryLexer.lexEditDistance = function (lexer) { + lexer.ignore() + lexer.acceptDigitRun() + lexer.emit(lunr.QueryLexer.EDIT_DISTANCE) + return lunr.QueryLexer.lexText +} + +lunr.QueryLexer.lexBoost = function (lexer) { + lexer.ignore() + lexer.acceptDigitRun() + lexer.emit(lunr.QueryLexer.BOOST) + return lunr.QueryLexer.lexText +} + +lunr.QueryLexer.lexEOS = function (lexer) { + if (lexer.width() > 0) { + lexer.emit(lunr.QueryLexer.TERM) + } +} + +// This matches the separator used when tokenising fields +// within a document. These should match otherwise it is +// not possible to search for some tokens within a document. +// +// It is possible for the user to change the separator on the +// tokenizer so it _might_ clash with any other of the special +// characters already used within the search string, e.g. :. +// +// This means that it is possible to change the separator in +// such a way that makes some words unsearchable using a search +// string. +lunr.QueryLexer.termSeparator = lunr.tokenizer.separator + +lunr.QueryLexer.lexText = function (lexer) { + while (true) { + var char = lexer.next() + + if (char == lunr.QueryLexer.EOS) { + return lunr.QueryLexer.lexEOS + } + + // Escape character is '\' + if (char.charCodeAt(0) == 92) { + lexer.escapeCharacter() + continue + } + + if (char == ":") { + return lunr.QueryLexer.lexField + } + + if (char == "~") { + lexer.backup() + if (lexer.width() > 0) { + lexer.emit(lunr.QueryLexer.TERM) + } + return lunr.QueryLexer.lexEditDistance + } + + if (char == "^") { + lexer.backup() + if (lexer.width() > 0) { + lexer.emit(lunr.QueryLexer.TERM) + } + return lunr.QueryLexer.lexBoost + } + + // "+" indicates term presence is required + // checking for length to ensure that only + // leading "+" are considered + if (char == "+" && lexer.width() === 1) { + lexer.emit(lunr.QueryLexer.PRESENCE) + return lunr.QueryLexer.lexText + } + + // "-" indicates term presence is prohibited + // checking for length to ensure that only + // leading "-" are considered + if (char == "-" && lexer.width() === 1) { + lexer.emit(lunr.QueryLexer.PRESENCE) + return lunr.QueryLexer.lexText + } + + if (char.match(lunr.QueryLexer.termSeparator)) { + return lunr.QueryLexer.lexTerm + } + } +} + +lunr.QueryParser = function (str, query) { + this.lexer = new lunr.QueryLexer (str) + this.query = query + this.currentClause = {} + this.lexemeIdx = 0 +} + +lunr.QueryParser.prototype.parse = function () { + this.lexer.run() + this.lexemes = this.lexer.lexemes + + var state = lunr.QueryParser.parseClause + + while (state) { + state = state(this) + } + + return this.query +} + +lunr.QueryParser.prototype.peekLexeme = function () { + return this.lexemes[this.lexemeIdx] +} + +lunr.QueryParser.prototype.consumeLexeme = function () { + var lexeme = this.peekLexeme() + this.lexemeIdx += 1 + return lexeme +} + +lunr.QueryParser.prototype.nextClause = function () { + var completedClause = this.currentClause + this.query.clause(completedClause) + this.currentClause = {} +} + +lunr.QueryParser.parseClause = function (parser) { + var lexeme = parser.peekLexeme() + + if (lexeme == undefined) { + return + } + + switch (lexeme.type) { + case lunr.QueryLexer.PRESENCE: + return lunr.QueryParser.parsePresence + case lunr.QueryLexer.FIELD: + return lunr.QueryParser.parseField + case lunr.QueryLexer.TERM: + return lunr.QueryParser.parseTerm + default: + var errorMessage = "expected either a field or a term, found " + lexeme.type + + if (lexeme.str.length >= 1) { + errorMessage += " with value '" + lexeme.str + "'" + } + + throw new lunr.QueryParseError (errorMessage, lexeme.start, lexeme.end) + } +} + +lunr.QueryParser.parsePresence = function (parser) { + var lexeme = parser.consumeLexeme() + + if (lexeme == undefined) { + return + } + + switch (lexeme.str) { + case "-": + parser.currentClause.presence = lunr.Query.presence.PROHIBITED + break + case "+": + parser.currentClause.presence = lunr.Query.presence.REQUIRED + break + default: + var errorMessage = "unrecognised presence operator'" + lexeme.str + "'" + throw new lunr.QueryParseError (errorMessage, lexeme.start, lexeme.end) + } + + var nextLexeme = parser.peekLexeme() + + if (nextLexeme == undefined) { + var errorMessage = "expecting term or field, found nothing" + throw new lunr.QueryParseError (errorMessage, lexeme.start, lexeme.end) + } + + switch (nextLexeme.type) { + case lunr.QueryLexer.FIELD: + return lunr.QueryParser.parseField + case lunr.QueryLexer.TERM: + return lunr.QueryParser.parseTerm + default: + var errorMessage = "expecting term or field, found '" + nextLexeme.type + "'" + throw new lunr.QueryParseError (errorMessage, nextLexeme.start, nextLexeme.end) + } +} + +lunr.QueryParser.parseField = function (parser) { + var lexeme = parser.consumeLexeme() + + if (lexeme == undefined) { + return + } + + if (parser.query.allFields.indexOf(lexeme.str) == -1) { + var possibleFields = parser.query.allFields.map(function (f) { return "'" + f + "'" }).join(', '), + errorMessage = "unrecognised field '" + lexeme.str + "', possible fields: " + possibleFields + + throw new lunr.QueryParseError (errorMessage, lexeme.start, lexeme.end) + } + + parser.currentClause.fields = [lexeme.str] + + var nextLexeme = parser.peekLexeme() + + if (nextLexeme == undefined) { + var errorMessage = "expecting term, found nothing" + throw new lunr.QueryParseError (errorMessage, lexeme.start, lexeme.end) + } + + switch (nextLexeme.type) { + case lunr.QueryLexer.TERM: + return lunr.QueryParser.parseTerm + default: + var errorMessage = "expecting term, found '" + nextLexeme.type + "'" + throw new lunr.QueryParseError (errorMessage, nextLexeme.start, nextLexeme.end) + } +} + +lunr.QueryParser.parseTerm = function (parser) { + var lexeme = parser.consumeLexeme() + + if (lexeme == undefined) { + return + } + + parser.currentClause.term = lexeme.str.toLowerCase() + + if (lexeme.str.indexOf("*") != -1) { + parser.currentClause.usePipeline = false + } + + var nextLexeme = parser.peekLexeme() + + if (nextLexeme == undefined) { + parser.nextClause() + return + } + + switch (nextLexeme.type) { + case lunr.QueryLexer.TERM: + parser.nextClause() + return lunr.QueryParser.parseTerm + case lunr.QueryLexer.FIELD: + parser.nextClause() + return lunr.QueryParser.parseField + case lunr.QueryLexer.EDIT_DISTANCE: + return lunr.QueryParser.parseEditDistance + case lunr.QueryLexer.BOOST: + return lunr.QueryParser.parseBoost + case lunr.QueryLexer.PRESENCE: + parser.nextClause() + return lunr.QueryParser.parsePresence + default: + var errorMessage = "Unexpected lexeme type '" + nextLexeme.type + "'" + throw new lunr.QueryParseError (errorMessage, nextLexeme.start, nextLexeme.end) + } +} + +lunr.QueryParser.parseEditDistance = function (parser) { + var lexeme = parser.consumeLexeme() + + if (lexeme == undefined) { + return + } + + var editDistance = parseInt(lexeme.str, 10) + + if (isNaN(editDistance)) { + var errorMessage = "edit distance must be numeric" + throw new lunr.QueryParseError (errorMessage, lexeme.start, lexeme.end) + } + + parser.currentClause.editDistance = editDistance + + var nextLexeme = parser.peekLexeme() + + if (nextLexeme == undefined) { + parser.nextClause() + return + } + + switch (nextLexeme.type) { + case lunr.QueryLexer.TERM: + parser.nextClause() + return lunr.QueryParser.parseTerm + case lunr.QueryLexer.FIELD: + parser.nextClause() + return lunr.QueryParser.parseField + case lunr.QueryLexer.EDIT_DISTANCE: + return lunr.QueryParser.parseEditDistance + case lunr.QueryLexer.BOOST: + return lunr.QueryParser.parseBoost + case lunr.QueryLexer.PRESENCE: + parser.nextClause() + return lunr.QueryParser.parsePresence + default: + var errorMessage = "Unexpected lexeme type '" + nextLexeme.type + "'" + throw new lunr.QueryParseError (errorMessage, nextLexeme.start, nextLexeme.end) + } +} + +lunr.QueryParser.parseBoost = function (parser) { + var lexeme = parser.consumeLexeme() + + if (lexeme == undefined) { + return + } + + var boost = parseInt(lexeme.str, 10) + + if (isNaN(boost)) { + var errorMessage = "boost must be numeric" + throw new lunr.QueryParseError (errorMessage, lexeme.start, lexeme.end) + } + + parser.currentClause.boost = boost + + var nextLexeme = parser.peekLexeme() + + if (nextLexeme == undefined) { + parser.nextClause() + return + } + + switch (nextLexeme.type) { + case lunr.QueryLexer.TERM: + parser.nextClause() + return lunr.QueryParser.parseTerm + case lunr.QueryLexer.FIELD: + parser.nextClause() + return lunr.QueryParser.parseField + case lunr.QueryLexer.EDIT_DISTANCE: + return lunr.QueryParser.parseEditDistance + case lunr.QueryLexer.BOOST: + return lunr.QueryParser.parseBoost + case lunr.QueryLexer.PRESENCE: + parser.nextClause() + return lunr.QueryParser.parsePresence + default: + var errorMessage = "Unexpected lexeme type '" + nextLexeme.type + "'" + throw new lunr.QueryParseError (errorMessage, nextLexeme.start, nextLexeme.end) + } +} + + /** + * export the module via AMD, CommonJS or as a browser global + * Export code from https://github.com/umdjs/umd/blob/master/returnExports.js + */ + ;(function (root, factory) { + if (typeof define === 'function' && define.amd) { + // AMD. Register as an anonymous module. + define(factory) + } else if (typeof exports === 'object') { + /** + * Node. Does not work with strict CommonJS, but + * only CommonJS-like environments that support module.exports, + * like Node. + */ + module.exports = factory() + } else { + // Browser globals (root is window) + root.lunr = factory() + } + }(this, function () { + /** + * Just return a value to define the module export. + * This example returns an object, but the module + * can return a function as the exported value. + */ + return lunr + })) +})(); diff --git a/search/main.js b/search/main.js new file mode 100644 index 00000000..a5e469d7 --- /dev/null +++ b/search/main.js @@ -0,0 +1,109 @@ +function getSearchTermFromLocation() { + var sPageURL = window.location.search.substring(1); + var sURLVariables = sPageURL.split('&'); + for (var i = 0; i < sURLVariables.length; i++) { + var sParameterName = sURLVariables[i].split('='); + if (sParameterName[0] == 'q') { + return decodeURIComponent(sParameterName[1].replace(/\+/g, '%20')); + } + } +} + +function joinUrl (base, path) { + if (path.substring(0, 1) === "/") { + // path starts with `/`. Thus it is absolute. + return path; + } + if (base.substring(base.length-1) === "/") { + // base ends with `/` + return base + path; + } + return base + "/" + path; +} + +function escapeHtml (value) { + return value.replace(/&/g, '&') + .replace(/"/g, '"') + .replace(//g, '>'); +} + +function formatResult (location, title, summary) { + return ''; +} + +function displayResults (results) { + var search_results = document.getElementById("mkdocs-search-results"); + while (search_results.firstChild) { + search_results.removeChild(search_results.firstChild); + } + if (results.length > 0){ + for (var i=0; i < results.length; i++){ + var result = results[i]; + var html = formatResult(result.location, result.title, result.summary); + search_results.insertAdjacentHTML('beforeend', html); + } + } else { + var noResultsText = search_results.getAttribute('data-no-results-text'); + if (!noResultsText) { + noResultsText = "No results found"; + } + search_results.insertAdjacentHTML('beforeend', '

' + noResultsText + '

'); + } +} + +function doSearch () { + var query = document.getElementById('mkdocs-search-query').value; + if (query.length > min_search_length) { + if (!window.Worker) { + displayResults(search(query)); + } else { + searchWorker.postMessage({query: query}); + } + } else { + // Clear results for short queries + displayResults([]); + } +} + +function initSearch () { + var search_input = document.getElementById('mkdocs-search-query'); + if (search_input) { + search_input.addEventListener("keyup", doSearch); + } + var term = getSearchTermFromLocation(); + if (term) { + search_input.value = term; + doSearch(); + } +} + +function onWorkerMessage (e) { + if (e.data.allowSearch) { + initSearch(); + } else if (e.data.results) { + var results = e.data.results; + displayResults(results); + } else if (e.data.config) { + min_search_length = e.data.config.min_search_length-1; + } +} + +if (!window.Worker) { + console.log('Web Worker API not supported'); + // load index in main thread + $.getScript(joinUrl(base_url, "search/worker.js")).done(function () { + console.log('Loaded worker'); + init(); + window.postMessage = function (msg) { + onWorkerMessage({data: msg}); + }; + }).fail(function (jqxhr, settings, exception) { + console.error('Could not load worker.js'); + }); +} else { + // Wrap search in a web worker + var searchWorker = new Worker(joinUrl(base_url, "search/worker.js")); + searchWorker.postMessage({init: true}); + searchWorker.onmessage = onWorkerMessage; +} diff --git a/search/search_index.json b/search/search_index.json new file mode 100644 index 00000000..8d0764e6 --- /dev/null +++ b/search/search_index.json @@ -0,0 +1 @@ +{"config":{"indexing":"full","lang":["en"],"min_search_length":3,"prebuild_index":false,"separator":"[\\s\\-]+"},"docs":[{"location":"","text":"Quinn Pyspark helper methods to maximize developer productivity. Quinn provides DataFrame validation functions, useful column functions / DataFrame transformations, and performant helper functions. Setup Quinn is uploaded to PyPi and can be installed with this command: pip install quinn Quinn Helper Functions import quinn DataFrame Validations validate_presence_of_columns() quinn.validate_presence_of_columns(source_df, [\"name\", \"age\", \"fun\"]) Raises an exception unless source_df contains the name , age , and fun column. validate_schema() quinn.validate_schema(source_df, required_schema) Raises an exception unless source_df contains all the StructFields defined in the required_schema . validate_absence_of_columns() quinn.validate_absence_of_columns(source_df, [\"age\", \"cool\"]) Raises an exception if source_df contains age or cool columns. Functions single_space() actual_df = source_df.withColumn( \"words_single_spaced\", quinn.single_space(col(\"words\")) ) Replaces all multispaces with single spaces (e.g. changes \"this has some\" to \"this has some\" . remove_all_whitespace() actual_df = source_df.withColumn( \"words_without_whitespace\", quinn.remove_all_whitespace(col(\"words\")) ) Removes all whitespace in a string (e.g. changes \"this has some\" to \"thishassome\" . anti_trim() actual_df = source_df.withColumn( \"words_anti_trimmed\", quinn.anti_trim(col(\"words\")) ) Removes all inner whitespace, but doesn't delete leading or trailing whitespace (e.g. changes \" this has some \" to \" thishassome \" . remove_non_word_characters() actual_df = source_df.withColumn( \"words_without_nonword_chars\", quinn.remove_non_word_characters(col(\"words\")) ) Removes all non-word characters from a string (e.g. changes \"si%$#@!#$!@#mpsons\" to \"simpsons\" . multi_equals() source_df.withColumn( \"are_s1_and_s2_cat\", quinn.multi_equals(\"cat\")(col(\"s1\"), col(\"s2\")) ) multi_equals returns true if s1 and s2 are both equal to \"cat\" . approx_equal() This function takes 3 arguments which are 2 Pyspark DataFrames and one integer values as threshold, and returns the Boolean column which tells if the columns are equal in the threshold. let the columns be col1 = [1.2, 2.5, 3.1, 4.0, 5.5] col2 = [1.3, 2.3, 3.0, 3.9, 5.6] threshold = 0.2 result = approx_equal(col(\"col1\"), col(\"col2\"), threshold) result.show() +-----+ |value| +-----+ | true| |false| | true| | true| | true| +-----+ array_choice() This function takes a Column as a parameter and returns a PySpark column that contains a random value from the input column parameter df = spark.createDataFrame([(1,), (2,), (3,), (4,), (5,)], [\"values\"]) result = df.select(array_choice(col(\"values\"))) The output is := +--------------+ |array_choice()| +--------------+ | 2| +--------------+ regexp_extract_all() The regexp_extract_all takes 2 parameters String s and regexp which is a regular expression. This function finds all the matches for the string which satisfies the regular expression. print(regexp_extract_all(\"this is a example text message for testing application\",r\"\\b\\w*a\\w*\\b\")) The output is := ['a', 'example', 'message', 'application'] Where r\"\\b\\w*a\\w*\\b\" pattern checks for words containing letter a week_start_date() It takes 2 parameters, column and week_start_day. It returns a Spark Dataframe column which contains the start date of the week. By default the week_start_day is set to \"Sun\". For input [\"2023-03-05\", \"2023-03-06\", \"2023-03-07\", \"2023-03-08\"] the Output is result = df.select(\"date\", week_start_date(col(\"date\"), \"Sun\")) result.show() +----------+----------------+ | date|week_start_date | +----------+----------------+ |2023-03-05| 2023-03-05| |2023-03-07| 2023-03-05| |2023-03-08| 2023-03-05| +----------+----------------+ week_end_date() It also takes 2 Paramters as Column and week_end_day, and returns the dateframe column which contains the end date of the week. By default the week_end_day is set to \"sat\" +---------+-------------+ date|week_end_date| +---------+-------------+ 2023-03-05| 2023-03-05| 2023-03-07| 2023-03-12| 2023-03-08| 2023-03-12| +---------+-------------+ uuid5() This function generates UUIDv5 in string form from the passed column and optionally namespace and optional extra salt. By default namespace is NAMESPACE_DNS UUID and no extra string used to reduce hash collisions. df = spark.createDataFrame([(\"lorem\",), (\"ipsum\",)], [\"values\"]) result = df.select(quinn.uuid5(F.col(\"values\")).alias(\"uuid5\")) result.show(truncate=False) The output is := +------------------------------------+ |uuid5 | +------------------------------------+ |35482fda-c10a-5076-8da2-dc7bf22d6be4| |51b79c1d-d06c-5b30-a5c6-1fadcd3b2103| +------------------------------------+ Transformations snake_case_col_names() quinn.snake_case_col_names(source_df) Converts all the column names in a DataFrame to snake_case. It's annoying to write SQL queries when columns aren't snake cased. sort_columns() quinn.sort_columns(df=source_df, sort_order=\"asc\", sort_nested=True) Sorts the DataFrame columns in alphabetical order, including nested columns if sort_nested is set to True. Wide DataFrames are easier to navigate when they're sorted alphabetically. DataFrame Helpers column_to_list() quinn.column_to_list(source_df, \"name\") Converts a column in a DataFrame to a list of values. two_columns_to_dictionary() quinn.two_columns_to_dictionary(source_df, \"name\", \"age\") Converts two columns of a DataFrame into a dictionary. In this example, name is the key and age is the value. to_list_of_dictionaries() quinn.to_list_of_dictionaries(source_df) Converts an entire DataFrame into a list of dictionaries. show_output_to_df() quinn.show_output_to_df(output_str, spark) Parses a spark DataFrame output string into a spark DataFrame. Useful for quickly pulling data from a log into a DataFrame. In this example, output_str is a string of the form: +----+---+-----------+------+ |name|age| stuff1|stuff2| +----+---+-----------+------+ |jose| 1|nice person| yoyo| | li| 2|nice person| yoyo| | liz| 3|nice person| yoyo| +----+---+-----------+------+ Schema Helpers schema_from_csv() quinn.schema_from_csv(\"schema.csv\") Converts a CSV file into a PySpark schema (aka StructType ). The CSV must contain the column name and type. The nullable and metadata columns are optional. Here's an example CSV file: name,type person,string address,string phoneNumber,string age,int Here's how to convert that CSV file to a PySpark schema: schema = schema_from_csv(spark, \"some_file.csv\") StructType([ StructField(\"person\", StringType(), True), StructField(\"address\", StringType(), True), StructField(\"phoneNumber\", StringType(), True), StructField(\"age\", IntegerType(), True), ]) Here's a more complex CSV file: name,type,nullable,metadata person,string,false,{\"description\":\"The person's name\"} address,string phoneNumber,string,TRUE,{\"description\":\"The person's phone number\"} age,int,False Here's how to read this CSV file into a PySpark schema: another_schema = schema_from_csv(spark, \"some_file.csv\") StructType([ StructField(\"person\", StringType(), False, {\"description\": \"The person's name\"}), StructField(\"address\", StringType(), True), StructField(\"phoneNumber\", StringType(), True, {\"description\": \"The person's phone number\"}), StructField(\"age\", IntegerType(), False), ]) print_schema_as_code() fields = [ StructField(\"simple_int\", IntegerType()), StructField(\"decimal_with_nums\", DecimalType(19, 8)), StructField(\"array\", ArrayType(FloatType())) ] schema = StructType(fields) printable_schema: str = quinn.print_schema_as_code(schema) Converts a Spark DataType to a string of Python code that can be evaluated as code using eval(). If the DataType is a StructType , this can be used to print an existing schema in a format that can be copy-pasted into a Python script, log to a file, etc. For example: print(printable_schema) StructType( fields=[ StructField(\"simple_int\", IntegerType(), True), StructField(\"decimal_with_nums\", DecimalType(19, 8), True), StructField( \"array\", ArrayType(FloatType()), True, ), ] ) Once evaluated, the printable schema is a valid schema that can be used in dataframe creation, validation, etc. from chispa.schema_comparer import assert_basic_schema_equality parsed_schema = eval(printable_schema) assert_basic_schema_equality(parsed_schema, schema) # passes print_schema_as_code() can also be used to print other DataType objects. ArrayType array_type = ArrayType(FloatType()) printable_type: str = quinn.print_schema_as_code(array_type) print(printable_type) ``` ``` ArrayType(FloatType()) ``` `MapType` ```python map_type = MapType(StringType(), FloatType()) printable_type: str = quinn.print_schema_as_code(map_type) print(printable_type) ``` ``` MapType( StringType(), FloatType(), True, ) ``` `IntegerType`, `StringType` etc. ```python integer_type = IntegerType() printable_type: str = quinn.print_schema_as_code(integer_type) print(printable_type) ``` ``` IntegerType() ``` ## Pyspark Core Class Extensions from quinn.extensions import * ### Column Extensions **isFalsy()** ```python source_df.withColumn(\"is_stuff_falsy\", F.col(\"has_stuff\").isFalsy()) Returns True if has_stuff is None or False . isTruthy() source_df.withColumn(\"is_stuff_truthy\", F.col(\"has_stuff\").isTruthy()) Returns True unless has_stuff is None or False . isNullOrBlank() source_df.withColumn(\"is_blah_null_or_blank\", F.col(\"blah\").isNullOrBlank()) Returns True if blah is null or blank (the empty string or a string that only contains whitespace). isNotIn() source_df.withColumn(\"is_not_bobs_hobby\", F.col(\"fun_thing\").isNotIn(bobs_hobbies)) Returns True if fun_thing is not included in the bobs_hobbies list. nullBetween() source_df.withColumn(\"is_between\", F.col(\"age\").nullBetween(F.col(\"lower_age\"), F.col(\"upper_age\"))) Returns True if age is between lower_age and upper_age . If lower_age is populated and upper_age is null , it will return True if age is greater than or equal to lower_age . If lower_age is null and upper_age is populate, it will return True if age is lower than or equal to upper_age . Contributing We are actively looking for feature requests, pull requests, and bug fixes. Any developer that demonstrates excellence will be invited to be a maintainer of the project. Code Style We are using PySpark code-style and sphinx as docstrings format. For more details about sphinx format see this tutorial . A short example of sphinx -formated docstring is placed below: \"\"\"[Summary] :param [ParamName]: [ParamDescription], defaults to [DefaultParamVal] :type [ParamName]: [ParamType](, optional) ... :raises [ErrorType]: [ErrorDescription] ... :return: [ReturnDescription] :rtype: [ReturnType] \"\"\"","title":"Quin"},{"location":"#quinn","text":"Pyspark helper methods to maximize developer productivity. Quinn provides DataFrame validation functions, useful column functions / DataFrame transformations, and performant helper functions.","title":"Quinn"},{"location":"#setup","text":"Quinn is uploaded to PyPi and can be installed with this command: pip install quinn","title":"Setup"},{"location":"#quinn-helper-functions","text":"import quinn","title":"Quinn Helper Functions"},{"location":"#dataframe-validations","text":"validate_presence_of_columns() quinn.validate_presence_of_columns(source_df, [\"name\", \"age\", \"fun\"]) Raises an exception unless source_df contains the name , age , and fun column. validate_schema() quinn.validate_schema(source_df, required_schema) Raises an exception unless source_df contains all the StructFields defined in the required_schema . validate_absence_of_columns() quinn.validate_absence_of_columns(source_df, [\"age\", \"cool\"]) Raises an exception if source_df contains age or cool columns.","title":"DataFrame Validations"},{"location":"#functions","text":"single_space() actual_df = source_df.withColumn( \"words_single_spaced\", quinn.single_space(col(\"words\")) ) Replaces all multispaces with single spaces (e.g. changes \"this has some\" to \"this has some\" . remove_all_whitespace() actual_df = source_df.withColumn( \"words_without_whitespace\", quinn.remove_all_whitespace(col(\"words\")) ) Removes all whitespace in a string (e.g. changes \"this has some\" to \"thishassome\" . anti_trim() actual_df = source_df.withColumn( \"words_anti_trimmed\", quinn.anti_trim(col(\"words\")) ) Removes all inner whitespace, but doesn't delete leading or trailing whitespace (e.g. changes \" this has some \" to \" thishassome \" . remove_non_word_characters() actual_df = source_df.withColumn( \"words_without_nonword_chars\", quinn.remove_non_word_characters(col(\"words\")) ) Removes all non-word characters from a string (e.g. changes \"si%$#@!#$!@#mpsons\" to \"simpsons\" . multi_equals() source_df.withColumn( \"are_s1_and_s2_cat\", quinn.multi_equals(\"cat\")(col(\"s1\"), col(\"s2\")) ) multi_equals returns true if s1 and s2 are both equal to \"cat\" . approx_equal() This function takes 3 arguments which are 2 Pyspark DataFrames and one integer values as threshold, and returns the Boolean column which tells if the columns are equal in the threshold. let the columns be col1 = [1.2, 2.5, 3.1, 4.0, 5.5] col2 = [1.3, 2.3, 3.0, 3.9, 5.6] threshold = 0.2 result = approx_equal(col(\"col1\"), col(\"col2\"), threshold) result.show() +-----+ |value| +-----+ | true| |false| | true| | true| | true| +-----+ array_choice() This function takes a Column as a parameter and returns a PySpark column that contains a random value from the input column parameter df = spark.createDataFrame([(1,), (2,), (3,), (4,), (5,)], [\"values\"]) result = df.select(array_choice(col(\"values\"))) The output is := +--------------+ |array_choice()| +--------------+ | 2| +--------------+ regexp_extract_all() The regexp_extract_all takes 2 parameters String s and regexp which is a regular expression. This function finds all the matches for the string which satisfies the regular expression. print(regexp_extract_all(\"this is a example text message for testing application\",r\"\\b\\w*a\\w*\\b\")) The output is := ['a', 'example', 'message', 'application'] Where r\"\\b\\w*a\\w*\\b\" pattern checks for words containing letter a week_start_date() It takes 2 parameters, column and week_start_day. It returns a Spark Dataframe column which contains the start date of the week. By default the week_start_day is set to \"Sun\". For input [\"2023-03-05\", \"2023-03-06\", \"2023-03-07\", \"2023-03-08\"] the Output is result = df.select(\"date\", week_start_date(col(\"date\"), \"Sun\")) result.show() +----------+----------------+ | date|week_start_date | +----------+----------------+ |2023-03-05| 2023-03-05| |2023-03-07| 2023-03-05| |2023-03-08| 2023-03-05| +----------+----------------+ week_end_date() It also takes 2 Paramters as Column and week_end_day, and returns the dateframe column which contains the end date of the week. By default the week_end_day is set to \"sat\" +---------+-------------+ date|week_end_date| +---------+-------------+ 2023-03-05| 2023-03-05| 2023-03-07| 2023-03-12| 2023-03-08| 2023-03-12| +---------+-------------+ uuid5() This function generates UUIDv5 in string form from the passed column and optionally namespace and optional extra salt. By default namespace is NAMESPACE_DNS UUID and no extra string used to reduce hash collisions. df = spark.createDataFrame([(\"lorem\",), (\"ipsum\",)], [\"values\"]) result = df.select(quinn.uuid5(F.col(\"values\")).alias(\"uuid5\")) result.show(truncate=False) The output is := +------------------------------------+ |uuid5 | +------------------------------------+ |35482fda-c10a-5076-8da2-dc7bf22d6be4| |51b79c1d-d06c-5b30-a5c6-1fadcd3b2103| +------------------------------------+","title":"Functions"},{"location":"#transformations","text":"snake_case_col_names() quinn.snake_case_col_names(source_df) Converts all the column names in a DataFrame to snake_case. It's annoying to write SQL queries when columns aren't snake cased. sort_columns() quinn.sort_columns(df=source_df, sort_order=\"asc\", sort_nested=True) Sorts the DataFrame columns in alphabetical order, including nested columns if sort_nested is set to True. Wide DataFrames are easier to navigate when they're sorted alphabetically.","title":"Transformations"},{"location":"#dataframe-helpers","text":"column_to_list() quinn.column_to_list(source_df, \"name\") Converts a column in a DataFrame to a list of values. two_columns_to_dictionary() quinn.two_columns_to_dictionary(source_df, \"name\", \"age\") Converts two columns of a DataFrame into a dictionary. In this example, name is the key and age is the value. to_list_of_dictionaries() quinn.to_list_of_dictionaries(source_df) Converts an entire DataFrame into a list of dictionaries. show_output_to_df() quinn.show_output_to_df(output_str, spark) Parses a spark DataFrame output string into a spark DataFrame. Useful for quickly pulling data from a log into a DataFrame. In this example, output_str is a string of the form: +----+---+-----------+------+ |name|age| stuff1|stuff2| +----+---+-----------+------+ |jose| 1|nice person| yoyo| | li| 2|nice person| yoyo| | liz| 3|nice person| yoyo| +----+---+-----------+------+","title":"DataFrame Helpers"},{"location":"#schema-helpers","text":"schema_from_csv() quinn.schema_from_csv(\"schema.csv\") Converts a CSV file into a PySpark schema (aka StructType ). The CSV must contain the column name and type. The nullable and metadata columns are optional. Here's an example CSV file: name,type person,string address,string phoneNumber,string age,int Here's how to convert that CSV file to a PySpark schema: schema = schema_from_csv(spark, \"some_file.csv\") StructType([ StructField(\"person\", StringType(), True), StructField(\"address\", StringType(), True), StructField(\"phoneNumber\", StringType(), True), StructField(\"age\", IntegerType(), True), ]) Here's a more complex CSV file: name,type,nullable,metadata person,string,false,{\"description\":\"The person's name\"} address,string phoneNumber,string,TRUE,{\"description\":\"The person's phone number\"} age,int,False Here's how to read this CSV file into a PySpark schema: another_schema = schema_from_csv(spark, \"some_file.csv\") StructType([ StructField(\"person\", StringType(), False, {\"description\": \"The person's name\"}), StructField(\"address\", StringType(), True), StructField(\"phoneNumber\", StringType(), True, {\"description\": \"The person's phone number\"}), StructField(\"age\", IntegerType(), False), ]) print_schema_as_code() fields = [ StructField(\"simple_int\", IntegerType()), StructField(\"decimal_with_nums\", DecimalType(19, 8)), StructField(\"array\", ArrayType(FloatType())) ] schema = StructType(fields) printable_schema: str = quinn.print_schema_as_code(schema) Converts a Spark DataType to a string of Python code that can be evaluated as code using eval(). If the DataType is a StructType , this can be used to print an existing schema in a format that can be copy-pasted into a Python script, log to a file, etc. For example: print(printable_schema) StructType( fields=[ StructField(\"simple_int\", IntegerType(), True), StructField(\"decimal_with_nums\", DecimalType(19, 8), True), StructField( \"array\", ArrayType(FloatType()), True, ), ] ) Once evaluated, the printable schema is a valid schema that can be used in dataframe creation, validation, etc. from chispa.schema_comparer import assert_basic_schema_equality parsed_schema = eval(printable_schema) assert_basic_schema_equality(parsed_schema, schema) # passes print_schema_as_code() can also be used to print other DataType objects. ArrayType array_type = ArrayType(FloatType()) printable_type: str = quinn.print_schema_as_code(array_type) print(printable_type) ``` ``` ArrayType(FloatType()) ``` `MapType` ```python map_type = MapType(StringType(), FloatType()) printable_type: str = quinn.print_schema_as_code(map_type) print(printable_type) ``` ``` MapType( StringType(), FloatType(), True, ) ``` `IntegerType`, `StringType` etc. ```python integer_type = IntegerType() printable_type: str = quinn.print_schema_as_code(integer_type) print(printable_type) ``` ``` IntegerType() ``` ## Pyspark Core Class Extensions from quinn.extensions import * ### Column Extensions **isFalsy()** ```python source_df.withColumn(\"is_stuff_falsy\", F.col(\"has_stuff\").isFalsy()) Returns True if has_stuff is None or False . isTruthy() source_df.withColumn(\"is_stuff_truthy\", F.col(\"has_stuff\").isTruthy()) Returns True unless has_stuff is None or False . isNullOrBlank() source_df.withColumn(\"is_blah_null_or_blank\", F.col(\"blah\").isNullOrBlank()) Returns True if blah is null or blank (the empty string or a string that only contains whitespace). isNotIn() source_df.withColumn(\"is_not_bobs_hobby\", F.col(\"fun_thing\").isNotIn(bobs_hobbies)) Returns True if fun_thing is not included in the bobs_hobbies list. nullBetween() source_df.withColumn(\"is_between\", F.col(\"age\").nullBetween(F.col(\"lower_age\"), F.col(\"upper_age\"))) Returns True if age is between lower_age and upper_age . If lower_age is populated and upper_age is null , it will return True if age is greater than or equal to lower_age . If lower_age is null and upper_age is populate, it will return True if age is lower than or equal to upper_age .","title":"Schema Helpers"},{"location":"#contributing","text":"We are actively looking for feature requests, pull requests, and bug fixes. Any developer that demonstrates excellence will be invited to be a maintainer of the project.","title":"Contributing"},{"location":"#code-style","text":"We are using PySpark code-style and sphinx as docstrings format. For more details about sphinx format see this tutorial . A short example of sphinx -formated docstring is placed below: \"\"\"[Summary] :param [ParamName]: [ParamDescription], defaults to [DefaultParamVal] :type [ParamName]: [ParamType](, optional) ... :raises [ErrorType]: [ErrorDescription] ... :return: [ReturnDescription] :rtype: [ReturnType] \"\"\"","title":"Code Style"},{"location":"reference/SUMMARY/","text":"quinn append_if_schema_identical dataframe_helpers dataframe_validator extensions column_ext dataframe_ext spark_session_ext functions schema_helpers spark split_columns transformations","title":"API Docs"},{"location":"reference/quinn/","text":"quinn API. DataFrameMissingColumnError Bases: ValueError Raise this when there's a DataFrame column error. Source code in quinn/dataframe_validator.py class DataFrameMissingColumnError(ValueError): \"\"\"Raise this when there's a DataFrame column error.\"\"\" DataFrameMissingStructFieldError Bases: ValueError Raise this when there's a DataFrame column error. Source code in quinn/dataframe_validator.py class DataFrameMissingStructFieldError(ValueError): \"\"\"Raise this when there's a DataFrame column error.\"\"\" DataFrameProhibitedColumnError Bases: ValueError Raise this when a DataFrame includes prohibited columns. Source code in quinn/dataframe_validator.py class DataFrameProhibitedColumnError(ValueError): \"\"\"Raise this when a DataFrame includes prohibited columns.\"\"\" anti_trim(col) Remove whitespace from the boundaries of col using the regexp_replace function. Parameters: Name Type Description Default col Column Column on which to perform the regexp_replace. required Returns: Type Description Column A new Column with all whitespace removed from the boundaries. Source code in quinn/functions.py def anti_trim(col: Column) -> Column: \"\"\"Remove whitespace from the boundaries of ``col`` using the regexp_replace function. :param col: Column on which to perform the regexp_replace. :type col: Column :return: A new Column with all whitespace removed from the boundaries. :rtype: Column \"\"\" return F.regexp_replace(col, \"\\\\b\\\\s+\\\\b\", \"\") approx_equal(col1, col2, threshold) Compare two Column objects by checking if the difference between them is less than a specified threshold . Parameters: Name Type Description Default col1 Column the first Column required col2 Column the second Column required threshold Number value to compare with required Returns: Type Description Column Boolean Column with True indicating that abs(col1 - col2) is less than threshold Source code in quinn/functions.py def approx_equal(col1: Column, col2: Column, threshold: Number) -> Column: \"\"\"Compare two ``Column`` objects by checking if the difference between them is less than a specified ``threshold``. :param col1: the first ``Column`` :type col1: Column :param col2: the second ``Column`` :type col2: Column :param threshold: value to compare with :type threshold: Number :return: Boolean ``Column`` with ``True`` indicating that ``abs(col1 - col2)`` is less than ``threshold`` \"\"\" return F.abs(col1 - col2) < threshold business_days_between(start_date, end_date) Function takes two Spark Columns and returns a Column with the number of business days between the start and the end date. Parameters: Name Type Description Default start_date Column The column with the start dates required end_date Column The column with the end dates required Returns: Type Description Column a Column with the number of business days between the start and the end date Source code in quinn/functions.py def business_days_between(start_date: Column, end_date: Column) -> Column: # noqa: ARG001 \"\"\"Function takes two Spark `Columns` and returns a `Column` with the number of business days between the start and the end date. :param start_date: The column with the start dates :type start_date: Column :param end_date: The column with the end dates :type end_date: Column :returns: a Column with the number of business days between the start and the end date :rtype: Column \"\"\" all_days = \"sequence(start_date, end_date)\" days_of_week = f\"transform({all_days}, x -> date_format(x, 'E'))\" filter_weekends = F.expr(f\"filter({days_of_week}, x -> x NOT IN ('Sat', 'Sun'))\") num_business_days = F.size(filter_weekends) - 1 return F.when(num_business_days < 0, None).otherwise(num_business_days) column_to_list(df, col_name) Collect column to list of values. Parameters: Name Type Description Default df DataFrame Input DataFrame required col_name str Column to collect required Returns: Type Description List[Any] List of values Source code in quinn/dataframe_helpers.py def column_to_list(df: DataFrame, col_name: str) -> list[Any]: \"\"\"Collect column to list of values. :param df: Input DataFrame :type df: pyspark.sql.DataFrame :param col_name: Column to collect :type col_name: str :return: List of values :rtype: List[Any] \"\"\" return [x[col_name] for x in df.select(col_name).collect()] create_df(spark, rows_data, col_specs) Create a new DataFrame from the given data and column specs. The returned DataFrame s created using the StructType and StructField classes provided by PySpark. Parameters: Name Type Description Default spark SparkSession SparkSession object required rows_data array-like the data used to create the DataFrame required col_specs list of tuples list of tuples containing the name and type of the field required Returns: Type Description DataFrame a new DataFrame Source code in quinn/dataframe_helpers.py def create_df(spark: SparkSession, rows_data, col_specs) -> DataFrame: # noqa: ANN001 \"\"\"Create a new DataFrame from the given data and column specs. The returned DataFrame s created using the StructType and StructField classes provided by PySpark. :param spark: SparkSession object :type spark: SparkSession :param rows_data: the data used to create the DataFrame :type rows_data: array-like :param col_specs: list of tuples containing the name and type of the field :type col_specs: list of tuples :return: a new DataFrame :rtype: DataFrame \"\"\" struct_fields = list(map(lambda x: StructField(*x), col_specs)) # noqa: C417 return spark.createDataFrame(data=rows_data, schema=StructType(struct_fields)) exists(f) Create a user-defined function. It takes a list expressed as a column of type ArrayType(AnyType) as an argument and returns a boolean value indicating whether any element in the list is true according to the argument f of the exists() function. Parameters: Name Type Description Default f Callable [[ Any ], bool ] Callable function - A callable function that takes an element of type Any and returns a boolean value. required Returns: Type Description UserDefinedFunction A user-defined function that takes a list expressed as a column of type ArrayType(AnyType) as an argument and returns a boolean value indicating whether any element in the list is true according to the argument f of the exists() function. Source code in quinn/functions.py def exists(f: Callable[[Any], bool]) -> udf: \"\"\"Create a user-defined function. It takes a list expressed as a column of type ``ArrayType(AnyType)`` as an argument and returns a boolean value indicating whether any element in the list is true according to the argument ``f`` of the ``exists()`` function. :param f: Callable function - A callable function that takes an element of type Any and returns a boolean value. :return: A user-defined function that takes a list expressed as a column of type ArrayType(AnyType) as an argument and returns a boolean value indicating whether any element in the list is true according to the argument ``f`` of the ``exists()`` function. :rtype: UserDefinedFunction \"\"\" def temp_udf(list_: list) -> bool: return any(map(f, list_)) return F.udf(temp_udf, BooleanType()) forall(f) The forall function allows for mapping a given boolean function to a list of arguments and return a single boolean value. It does this by creating a Spark UDF which takes in a list of arguments, applying the given boolean function to each element of the list and returning a single boolean value if all the elements pass through the given boolean function. Parameters: Name Type Description Default f Callable [[ Any ], bool ] A callable function f which takes in any type and returns a boolean required Returns: Type Description UserDefinedFunction A spark UDF which accepts a list of arguments and returns True if all elements pass through the given boolean function, False otherwise. Source code in quinn/functions.py def forall(f: Callable[[Any], bool]) -> udf: \"\"\"The **forall** function allows for mapping a given boolean function to a list of arguments and return a single boolean value. It does this by creating a Spark UDF which takes in a list of arguments, applying the given boolean function to each element of the list and returning a single boolean value if all the elements pass through the given boolean function. :param f: A callable function ``f`` which takes in any type and returns a boolean :return: A spark UDF which accepts a list of arguments and returns True if all elements pass through the given boolean function, False otherwise. :rtype: UserDefinedFunction \"\"\" def temp_udf(list_: list) -> bool: return all(map(f, list_)) return F.udf(temp_udf, BooleanType()) multi_equals(value) Create a user-defined function that checks if all the given columns have the designated value. Parameters: Name Type Description Default value Any The designated value. required Returns: Type Description UserDifinedFunction A user-defined function of type BooleanType(). Source code in quinn/functions.py def multi_equals(value: Any) -> udf: # noqa: ANN401 \"\"\"Create a user-defined function that checks if all the given columns have the designated value. :param value: The designated value. :type value: Any :return: A user-defined function of type BooleanType(). :rtype: UserDifinedFunction \"\"\" def temp_udf(*cols) -> bool: # noqa: ANN002 return all(map(lambda col: col == value, cols)) # noqa: C417 return F.udf(temp_udf, BooleanType()) print_athena_create_table(df, athena_table_name, s3location) Generate the Athena create table statement for a given DataFrame. Parameters: Name Type Description Default df DataFrame The pyspark.sql.DataFrame to use required athena_table_name str The name of the athena table to generate required s3location str The S3 location of the parquet data required Returns: Type Description None None Source code in quinn/dataframe_helpers.py def print_athena_create_table( df: DataFrame, athena_table_name: str, s3location: str, ) -> None: \"\"\"Generate the Athena create table statement for a given DataFrame. :param df: The pyspark.sql.DataFrame to use :param athena_table_name: The name of the athena table to generate :param s3location: The S3 location of the parquet data :return: None \"\"\" fields = df.schema print(f\"CREATE EXTERNAL TABLE IF NOT EXISTS `{athena_table_name}` ( \") for field in fields.fieldNames()[:-1]: print(\"\\t\", f\"`{fields[field].name}` {fields[field].dataType.simpleString()}, \") last = fields[fields.fieldNames()[-1]] print(\"\\t\", f\"`{last.name}` {last.dataType.simpleString()} \") print(\")\") print(\"STORED AS PARQUET\") print(f\"LOCATION '{s3location}'\\n\") print_schema_as_code(dtype) Represent DataType (including StructType) as valid Python code. Parameters: Name Type Description Default dtype T . DataType The input DataType or Schema object required Returns: Type Description str A valid python code which generate the same schema. Source code in quinn/schema_helpers.py def print_schema_as_code(dtype: T.DataType) -> str: \"\"\"Represent DataType (including StructType) as valid Python code. :param dtype: The input DataType or Schema object :type dtype: pyspark.sql.types.DataType :return: A valid python code which generate the same schema. :rtype: str \"\"\" res = [] if isinstance(dtype, T.StructType): res.append(\"StructType(\\n\\tfields=[\") for field in dtype.fields: for line in _repr_column(field).split(\"\\n\"): res.append(\"\\n\\t\\t\") res.append(line) res.append(\",\") res.append(\"\\n\\t]\\n)\") elif isinstance(dtype, T.ArrayType): res.append(\"ArrayType(\") res.append(print_schema_as_code(dtype.elementType)) res.append(\")\") elif isinstance(dtype, T.MapType): res.append(\"MapType(\") res.append(f\"\\n\\t{print_schema_as_code(dtype.keyType)},\") for line in print_schema_as_code(dtype.valueType).split(\"\\n\"): res.append(\"\\n\\t\") res.append(line) res.append(\",\") res.append(f\"\\n\\t{dtype.valueContainsNull},\") res.append(\"\\n)\") elif isinstance(dtype, T.DecimalType): res.append(f\"DecimalType({dtype.precision}, {dtype.scale})\") elif str(dtype).endswith(\"()\"): # PySpark 3.3+ res.append(str(dtype)) else: res.append(f\"{dtype}()\") return \"\".join(res) regexp_extract_all(s, regexp) Function uses the Python re library to extract regular expressions from a string ( s ) using a regex pattern ( regexp ). It returns a list of all matches, or None if s is None . Parameters: Name Type Description Default s Column input string ( Column ) required regexp Column string re pattern required Source code in quinn/functions.py @F.udf(returnType=ArrayType(StringType())) def regexp_extract_all(s: Column, regexp: Column) -> Column: \"\"\"Function uses the Python `re` library to extract regular expressions from a string (`s`) using a regex pattern (`regexp`). It returns a list of all matches, or `None` if `s` is `None`. :param s: input string (`Column`) :type s: str :param regexp: string `re` pattern :rtype: Column \"\"\" return None if s is None else re.findall(regexp, s) remove_all_whitespace(col) Function takes a Column object as a parameter and returns a Column object with all white space removed. It does this using the regexp_replace function from F, which replaces all whitespace with an empty string. Parameters: Name Type Description Default col Column a Column object required Returns: Type Description Column a Column object with all white space removed Source code in quinn/functions.py def remove_all_whitespace(col: Column) -> Column: \"\"\"Function takes a `Column` object as a parameter and returns a `Column` object with all white space removed. It does this using the regexp_replace function from F, which replaces all whitespace with an empty string. :param col: a `Column` object :type col: Column :returns: a `Column` object with all white space removed :rtype: Column \"\"\" return F.regexp_replace(col, \"\\\\s+\", \"\") remove_non_word_characters(col) Removes non-word characters from a column. The non-word characters which will be removed are those identified by the regular expression \"[^\\\\w\\\\s]+\" . This expression represents any character that is not a word character (e.g. \\\\w ) or whitespace ( \\\\s ). Parameters: Name Type Description Default col Column A Column object. required Returns: Type Description Column A Column object with non-word characters removed. Source code in quinn/functions.py def remove_non_word_characters(col: Column) -> Column: r\"\"\"Removes non-word characters from a column. The non-word characters which will be removed are those identified by the regular expression ``\"[^\\\\w\\\\s]+\"``. This expression represents any character that is not a word character (e.g. `\\\\w`) or whitespace (`\\\\s`). :param col: A Column object. :return: A Column object with non-word characters removed. \"\"\" return F.regexp_replace(col, \"[^\\\\w\\\\s]+\", \"\") show_output_to_df(show_output, spark) Show output as spark DataFrame. Parameters: Name Type Description Default show_output str String representing output of 'show' command in spark required spark SparkSession SparkSession object required Returns: Type Description Dataframe DataFrame object containing output of a show command in spark Source code in quinn/dataframe_helpers.py def show_output_to_df(show_output: str, spark: SparkSession) -> DataFrame: \"\"\"Show output as spark DataFrame. :param show_output: String representing output of 'show' command in spark :type show_output: str :param spark: SparkSession object :type spark: SparkSession :return: DataFrame object containing output of a show command in spark :rtype: Dataframe \"\"\" lines = show_output.split(\"\\n\") ugly_column_names = lines[1] pretty_column_names = [i.strip() for i in ugly_column_names[1:-1].split(\"|\")] pretty_data = [] ugly_data = lines[3:-1] for row in ugly_data: r = [i.strip() for i in row[1:-1].split(\"|\")] pretty_data.append(tuple(r)) return spark.createDataFrame(pretty_data, pretty_column_names) single_space(col) Function takes a column and replaces all the multiple white spaces with a single space. It then trims the column to make all the texts consistent. Parameters: Name Type Description Default col Column The column which needs to be spaced required Returns: Type Description Column A trimmed column with single space Source code in quinn/functions.py def single_space(col: Column) -> Column: \"\"\"Function takes a column and replaces all the multiple white spaces with a single space. It then trims the column to make all the texts consistent. :param col: The column which needs to be spaced :type col: Column :returns: A trimmed column with single space :rtype: Column \"\"\" return F.trim(F.regexp_replace(col, \" +\", \" \")) snake_case_col_names(df) Function takes a DataFrame instance and returns the same DataFrame instance with all column names converted to snake case. (e.g. col_name_1 ). It uses the to_snake_case function in conjunction with the with_columns_renamed function to achieve this. Parameters: Name Type Description Default df DataFrame A DataFrame instance to process required Returns: Type Description ``DataFrame``. A DataFrame instance with column names converted to snake case Source code in quinn/transformations.py def snake_case_col_names(df: DataFrame) -> DataFrame: \"\"\"Function takes a ``DataFrame`` instance and returns the same ``DataFrame`` instance with all column names converted to snake case. (e.g. ``col_name_1``). It uses the ``to_snake_case`` function in conjunction with the ``with_columns_renamed`` function to achieve this. :param df: A ``DataFrame`` instance to process :type df: ``DataFrame`` :return: A ``DataFrame`` instance with column names converted to snake case :rtype: ``DataFrame``. \"\"\" return with_columns_renamed(to_snake_case)(df) sort_columns(df, sort_order, sort_nested=False) This function sorts the columns of a given DataFrame based on a given sort order. The sort_order parameter can either be asc or desc , which correspond to ascending and descending order, respectively. If any other value is provided for the sort_order parameter, a ValueError will be raised. Parameters: Name Type Description Default df DataFrame A DataFrame required sort_order str The order in which to sort the columns in the DataFrame required sort_nested bool Whether to sort nested structs or not. Defaults to false. False Returns: Type Description pyspark.sql.DataFrame A DataFrame with the columns sorted in the chosen order Source code in quinn/transformations.py def sort_columns( # noqa: C901,PLR0915 df: DataFrame, sort_order: str, sort_nested: bool = False, ) -> DataFrame: \"\"\"This function sorts the columns of a given DataFrame based on a given sort order. The ``sort_order`` parameter can either be ``asc`` or ``desc``, which correspond to ascending and descending order, respectively. If any other value is provided for the ``sort_order`` parameter, a ``ValueError`` will be raised. :param df: A DataFrame :type df: pyspark.sql.DataFrame :param sort_order: The order in which to sort the columns in the DataFrame :type sort_order: str :param sort_nested: Whether to sort nested structs or not. Defaults to false. :type sort_nested: bool :return: A DataFrame with the columns sorted in the chosen order :rtype: pyspark.sql.DataFrame \"\"\" def sort_nested_cols(schema, is_reversed, base_field=\"\") -> list[str]: # noqa: ANN001 # recursively check nested fields and sort them # https://stackoverflow.com/questions/57821538/how-to-sort-columns-of-nested-structs-alphabetically-in-pyspark # Credits: @pault for logic def parse_fields( fields_to_sort: list, parent_struct, is_reversed: bool, # noqa: ANN001 ) -> list: sorted_fields: list = sorted( fields_to_sort, key=lambda x: x[\"name\"], reverse=is_reversed, ) results = [] for field in sorted_fields: new_struct = StructType([StructField.fromJson(field)]) new_base_field = parent_struct.name if base_field: new_base_field = base_field + \".\" + new_base_field results.extend( sort_nested_cols(new_struct, is_reversed, base_field=new_base_field), ) return results select_cols = [] for parent_struct in sorted(schema, key=lambda x: x.name, reverse=is_reversed): field_type = parent_struct.dataType if isinstance(field_type, ArrayType): array_parent = parent_struct.jsonValue()[\"type\"][\"elementType\"] base_str = f\"transform({parent_struct.name}\" suffix_str = f\") AS {parent_struct.name}\" # if struct in array, create mapping to struct if array_parent[\"type\"] == \"struct\": array_parent = array_parent[\"fields\"] base_str = f\"{base_str}, x -> struct(\" suffix_str = f\"){suffix_str}\" array_elements = parse_fields(array_parent, parent_struct, is_reversed) element_names = [i.split(\".\")[-1] for i in array_elements] array_elements_formatted = [f\"x.{i} as {i}\" for i in element_names] # create a string representation of the sorted array # ex: transform(phone_numbers, x -> struct(x.number as number, x.type as type)) AS phone_numbers result = f\"{base_str}{', '.join(array_elements_formatted)}{suffix_str}\" elif isinstance(field_type, StructType): field_list = parent_struct.jsonValue()[\"type\"][\"fields\"] sub_fields = parse_fields(field_list, parent_struct, is_reversed) # create a string representation of the sorted struct # ex: struct(address.zip.first5, address.zip.last4) AS zip result = f\"struct({', '.join(sub_fields)}) AS {parent_struct.name}\" elif base_field: result = f\"{base_field}.{parent_struct.name}\" else: result = parent_struct.name select_cols.append(result) return select_cols def get_original_nullability(field: StructField, result_dict: dict) -> None: if hasattr(field, \"nullable\"): result_dict[field.name] = field.nullable else: result_dict[field.name] = True if not isinstance(field.dataType, StructType) and not isinstance( field.dataType, ArrayType, ): return if isinstance(field.dataType, ArrayType): result_dict[f\"{field.name}_element\"] = field.dataType.containsNull children = field.dataType.elementType.fields else: children = field.dataType.fields for i in children: get_original_nullability(i, result_dict) def fix_nullability(field: StructField, result_dict: dict) -> None: field.nullable = result_dict[field.name] if not isinstance(field.dataType, StructType) and not isinstance( field.dataType, ArrayType, ): return if isinstance(field.dataType, ArrayType): # save the containsNull property of the ArrayType field.dataType.containsNull = result_dict[f\"{field.name}_element\"] children = field.dataType.elementType.fields else: children = field.dataType.fields for i in children: fix_nullability(i, result_dict) if sort_order not in [\"asc\", \"desc\"]: msg = f\"['asc', 'desc'] are the only valid sort orders and you entered a sort order of '{sort_order}'\" raise ValueError( msg, ) reverse_lookup = { \"asc\": False, \"desc\": True, } is_reversed: bool = reverse_lookup[sort_order] top_level_sorted_df = df.select(*sorted(df.columns, reverse=is_reversed)) if not sort_nested: return top_level_sorted_df is_nested: bool = any( isinstance(i.dataType, (StructType, ArrayType)) for i in top_level_sorted_df.schema ) if not is_nested: return top_level_sorted_df fully_sorted_schema = sort_nested_cols(top_level_sorted_df.schema, is_reversed) output = df.selectExpr(fully_sorted_schema) result_dict = {} for field in df.schema: get_original_nullability(field, result_dict) for field in output.schema: fix_nullability(field, result_dict) if not hasattr(SparkSession, \"getActiveSession\"): # spark 2.4 spark = SparkSession.builder.getOrCreate() else: spark = SparkSession.getActiveSession() spark = spark if spark is not None else SparkSession.builder.getOrCreate() return spark.createDataFrame(output.rdd, output.schema) split_col(df, col_name, delimiter, new_col_names, mode='permissive', default=None) Splits the given column based on the delimiter and creates new columns with the split values. Parameters: Name Type Description Default df DataFrame The input DataFrame required col_name str The name of the column to split required delimiter str The delimiter to split the column on required new_col_names list [ str ] A list of two strings for the new column names required mode str The split mode. Can be \"strict\" or \"permissive\". Default is \"permissive\" 'permissive' default Optional [ str ] If the mode is \"permissive\" then default value will be assigned to column None Returns: Type Description pyspark.sql.DataFrame. dataframe: The resulting DataFrame with the split columns Source code in quinn/split_columns.py def split_col( # noqa: PLR0913 df: DataFrame, col_name: str, delimiter: str, new_col_names: list[str], mode: str = \"permissive\", default: Optional[str] = None, ) -> DataFrame: \"\"\"Splits the given column based on the delimiter and creates new columns with the split values. :param df: The input DataFrame :type df: pyspark.sql.DataFrame :param col_name: The name of the column to split :type col_name: str :param delimiter: The delimiter to split the column on :type delimiter: str :param new_col_names: A list of two strings for the new column names :type new_col_names: (List[str]) :param mode: The split mode. Can be \"strict\" or \"permissive\". Default is \"permissive\" :type mode: str :param default: If the mode is \"permissive\" then default value will be assigned to column :type mode: str :return: dataframe: The resulting DataFrame with the split columns :rtype: pyspark.sql.DataFrame. \"\"\" # Check if the column to be split exists in the DataFrame if col_name not in df.columns: msg = f\"Column '{col_name}' not found in DataFrame.\" raise ValueError(msg) # Check if the delimiter is a string if not isinstance(delimiter, str): msg = \"Delimiter must be a string.\" raise TypeError(msg) # Check if the new column names are a list of strings if not isinstance(new_col_names, list): msg = \"New column names must be a list of strings.\" raise TypeError(msg) # Define a UDF to check the occurrence of delimitter def _num_delimiter(col_value1: str) -> int: # Get the count of delimiter and store the result in no_of_delimiter no_of_delimiter = col_value1.count(delimiter) # Split col_value based on delimiter and store the result in split_value split_value = col_value1.split(delimiter) # Check if col_value is not None if col_value1 is not None: # Check if the no of delimiters in split_value is not as expected if no_of_delimiter != len(new_col_names) - 1: # If the length is not same, raise an IndexError with the message mentioning the expected and found length msg = f\"Expected {len(new_col_names)} elements after splitting on delimiter, found {len(split_value)} elements\" raise IndexError( msg, ) # If the length of split_value is same as new_col_names, check if any of the split values is None or empty string elif any( # noqa: RET506 x is None or x.strip() == \"\" for x in split_value[: len(new_col_names)] ): msg = \"Null or empty values are not accepted for columns in strict mode\" raise ValueError( msg, ) # If the above checks pass, return the count of delimiter return int(no_of_delimiter) # If col_value is None, return 0 return 0 num_udf = udf(lambda y: None if y is None else _num_delimiter(y), IntegerType()) # Get the column expression for the column to be split col_expr = df[col_name] # Split the column by the delimiter split_col_expr = split(trim(col_expr), delimiter) # Check the split mode if mode == \"strict\": # Create an array of select expressions to create new columns from the split values select_exprs = [ when(split_col_expr.getItem(i) != \"\", split_col_expr.getItem(i)).alias( new_col_names[i], ) for i in range(len(new_col_names)) ] # Select all the columns from the input DataFrame, along with the new split columns df = df.select(\"*\", *select_exprs) # noqa: PD901 df = df.withColumn(\"del_length\", num_udf(df[col_name])) # noqa: PD901 df.cache() # Drop the original column if the new columns were created successfully df = df.select([c for c in df.columns if c not in {\"del_length\", col_name}]) # noqa: PD901 elif mode == \"permissive\": # Create an array of select expressions to create new columns from the split values # Use the default value if a split value is missing or empty select_exprs = select_exprs = [ when(length(split_col_expr.getItem(i)) > 0, split_col_expr.getItem(i)) .otherwise(default) .alias(new_col_names[i]) for i in range(len(new_col_names)) ] # Select all the columns from the input DataFrame, along with the new split columns # Drop the original column if the new columns were created successfully df = df.select(\"*\", *select_exprs).drop(col_name) # noqa: PD901 df.cache() else: msg = f\"Invalid mode: {mode}\" raise ValueError(msg) # Return the DataFrame with the split columns return df to_list_of_dictionaries(df) Convert a Spark DataFrame to a list of dictionaries. Parameters: Name Type Description Default df DataFrame The Spark DataFrame to convert. required Returns: Type Description List[Dict[str, Any]] A list of dictionaries representing the rows in the DataFrame. Source code in quinn/dataframe_helpers.py def to_list_of_dictionaries(df: DataFrame) -> list[dict[str, Any]]: \"\"\"Convert a Spark DataFrame to a list of dictionaries. :param df: The Spark DataFrame to convert. :type df: :py:class:`pyspark.sql.DataFrame` :return: A list of dictionaries representing the rows in the DataFrame. :rtype: List[Dict[str, Any]] \"\"\" return list(map(lambda r: r.asDict(), df.collect())) # noqa: C417 to_snake_case(s) Takes a string and converts it to snake case format. Parameters: Name Type Description Default s str The string to be converted. required Returns: Type Description str The string in snake case format. Source code in quinn/transformations.py def to_snake_case(s: str) -> str: \"\"\"Takes a string and converts it to snake case format. :param s: The string to be converted. :type s: str :return: The string in snake case format. :rtype: str \"\"\" return s.lower().replace(\" \", \"_\") two_columns_to_dictionary(df, key_col_name, value_col_name) Collect two columns as dictionary when first column is key and second is value. Parameters: Name Type Description Default df DataFrame Input DataFrame required key_col_name str Key-column required value_col_name str Value-column required Returns: Type Description Dict[str, Any] Dictionary with values Source code in quinn/dataframe_helpers.py def two_columns_to_dictionary( df: DataFrame, key_col_name: str, value_col_name: str, ) -> dict[str, Any]: \"\"\"Collect two columns as dictionary when first column is key and second is value. :param df: Input DataFrame :type df: pyspark.sql.DataFrame :param key_col_name: Key-column :type key_col_name: str :param value_col_name: Value-column :type value_col_name: str :return: Dictionary with values :rtype: Dict[str, Any] \"\"\" k, v = key_col_name, value_col_name return {x[k]: x[v] for x in df.select(k, v).collect()} uuid5(col, namespace=uuid.NAMESPACE_DNS, extra_string='') Function generates UUIDv5 from col and namespace , optionally prepending an extra string to col . Sets variant to RFC 4122 one. Parameters: Name Type Description Default col Column Column that will be hashed. required namespace uuid . UUID Namespace to be used. (default: uuid.NAMESPACE_DNS ) uuid.NAMESPACE_DNS extra_string str In case of collisions one can pass an extra string to hash on. '' Returns: Type Description Column String representation of generated UUIDv5 Source code in quinn/functions.py def uuid5( col: Column, namespace: uuid.UUID = uuid.NAMESPACE_DNS, extra_string: str = \"\", ) -> Column: \"\"\"Function generates UUIDv5 from ``col`` and ``namespace``, optionally prepending an extra string to ``col``. Sets variant to RFC 4122 one. :param col: Column that will be hashed. :type col: Column :param namespace: Namespace to be used. (default: `uuid.NAMESPACE_DNS`) :type namespace: str :param extra_string: In case of collisions one can pass an extra string to hash on. :type extra_string: str :return: String representation of generated UUIDv5 :rtype: Column \"\"\" ns = F.lit(namespace.bytes) salted_col = F.concat(F.lit(extra_string), col) encoded = F.encode(salted_col, \"utf-8\") encoded_with_ns = F.concat(ns, encoded) hashed = F.sha1(encoded_with_ns) variant_part = F.substring(hashed, 17, 4) variant_part = F.conv(variant_part, 16, 2) variant_part = F.lpad(variant_part, 16, \"0\") variant_part = F.concat( F.lit(\"10\"), F.substring(variant_part, 3, 16), ) # RFC 4122 variant. variant_part = F.lower(F.conv(variant_part, 2, 16)) return F.concat_ws( \"-\", F.substring(hashed, 1, 8), F.substring(hashed, 9, 4), F.concat(F.lit(\"5\"), F.substring(hashed, 14, 3)), # Set version. variant_part, F.substring(hashed, 21, 12), ) validate_absence_of_columns(df, prohibited_col_names) Validate that none of the prohibited column names are present among specified DataFrame columns. Parameters: Name Type Description Default df DataFrame DataFrame containing columns to be checked. required prohibited_col_names list [ str ] List of prohibited column names. required Raises: Type Description DataFrameProhibitedColumnError If the prohibited column names are present among the specified DataFrame columns. Source code in quinn/dataframe_validator.py def validate_absence_of_columns(df: DataFrame, prohibited_col_names: list[str]) -> None: \"\"\"Validate that none of the prohibited column names are present among specified DataFrame columns. :param df: DataFrame containing columns to be checked. :param prohibited_col_names: List of prohibited column names. :raises DataFrameProhibitedColumnError: If the prohibited column names are present among the specified DataFrame columns. \"\"\" all_col_names = df.columns extra_col_names = [x for x in all_col_names if x in prohibited_col_names] error_message = f\"The {extra_col_names} columns are not allowed to be included in the DataFrame with the following columns {all_col_names}\" if extra_col_names: raise DataFrameProhibitedColumnError(error_message) validate_presence_of_columns(df, required_col_names) Validate the presence of column names in a DataFrame. Parameters: Name Type Description Default df DataFrame A spark DataFrame. required required_col_names list [ str ] List of the required column names for the DataFrame. required Returns: Type Description None None. Raises: Type Description DataFrameMissingColumnError if any of the requested column names are not present in the DataFrame. Source code in quinn/dataframe_validator.py def validate_presence_of_columns(df: DataFrame, required_col_names: list[str]) -> None: \"\"\"Validate the presence of column names in a DataFrame. :param df: A spark DataFrame. :type df: DataFrame` :param required_col_names: List of the required column names for the DataFrame. :type required_col_names: :py:class:`list` of :py:class:`str` :return: None. :raises DataFrameMissingColumnError: if any of the requested column names are not present in the DataFrame. \"\"\" all_col_names = df.columns missing_col_names = [x for x in required_col_names if x not in all_col_names] error_message = f\"The {missing_col_names} columns are not included in the DataFrame with the following columns {all_col_names}\" if missing_col_names: raise DataFrameMissingColumnError(error_message) validate_schema(df, required_schema, ignore_nullable=False) Function that validate if a given DataFrame has a given StructType as its schema. Parameters: Name Type Description Default df DataFrame DataFrame to validate required required_schema StructType StructType required for the DataFrame required ignore_nullable bool (Optional) A flag for if nullable fields should be ignored during validation False Raises: Type Description DataFrameMissingStructFieldError if any StructFields from the required schema are not included in the DataFrame schema Source code in quinn/dataframe_validator.py def validate_schema( df: DataFrame, required_schema: StructType, ignore_nullable: bool = False, ) -> None: \"\"\"Function that validate if a given DataFrame has a given StructType as its schema. :param df: DataFrame to validate :type df: DataFrame :param required_schema: StructType required for the DataFrame :type required_schema: StructType :param ignore_nullable: (Optional) A flag for if nullable fields should be ignored during validation :type ignore_nullable: bool, optional :raises DataFrameMissingStructFieldError: if any StructFields from the required schema are not included in the DataFrame schema \"\"\" _all_struct_fields = copy.deepcopy(df.schema) _required_schema = copy.deepcopy(required_schema) if ignore_nullable: for x in _all_struct_fields: x.nullable = None for x in _required_schema: x.nullable = None missing_struct_fields = [x for x in _required_schema if x not in _all_struct_fields] error_message = f\"The {missing_struct_fields} StructFields are not included in the DataFrame with the following StructFields {_all_struct_fields}\" if missing_struct_fields: raise DataFrameMissingStructFieldError(error_message) week_end_date(col, week_end_day='Sat') Return a date column for the end of week for a given day. The Spark function dayofweek considers Sunday as the first day of the week, and uses the default value of 1 to indicate Sunday. Usage of the when and otherwise functions allow a comparison between the end of week day indicated and the day of week computed, and the return of the reference date if they match or the the addition of one week to the reference date otherwise. Parameters: Name Type Description Default col Column The reference date column. required week_end_day str The week end day (default: 'Sat') 'Sat' Returns: Type Description Column A Column of end of the week dates. Source code in quinn/functions.py def week_end_date(col: Column, week_end_day: str = \"Sat\") -> Column: \"\"\"Return a date column for the end of week for a given day. The Spark function `dayofweek` considers Sunday as the first day of the week, and uses the default value of 1 to indicate Sunday. Usage of the `when` and `otherwise` functions allow a comparison between the end of week day indicated and the day of week computed, and the return of the reference date if they match or the the addition of one week to the reference date otherwise. :param col: The reference date column. :type col: Column :param week_end_day: The week end day (default: 'Sat') :type week_end_day: str :return: A Column of end of the week dates. :rtype: Column \"\"\" _raise_if_invalid_day(week_end_day) # these are the default Spark mappings. Spark considers Sunday the first day of the week. day_of_week_mapping = { \"Sun\": 1, \"Mon\": 2, \"Tue\": 3, \"Wed\": 4, \"Thu\": 5, \"Fri\": 6, \"Sat\": 7, } return F.when( F.dayofweek(col).eqNullSafe(F.lit(day_of_week_mapping[week_end_day])), col, ).otherwise(F.next_day(col, week_end_day)) week_start_date(col, week_start_day='Sun') Function takes a Spark Column and an optional week_start_day argument and returns a Column with the corresponding start of week dates. The \"standard week\" in Spark starts on Sunday, however an optional argument can be used to start the week from a different day, e.g. Monday. The week_start_day argument is a string corresponding to the day of the week to start the week from, e.g. \"Mon\" , \"Tue\" , and must be in the set: {\"Sun\", \"Mon\", \"Tue\", \"Wed\", \"Thu\", \"Fri\", \"Sat\"} . If the argument given is not a valid day then a ValueError will be raised. Parameters: Name Type Description Default col Column The column to determine start of week dates on required week_start_day str The day to start the week on 'Sun' Returns: Type Description Column A Column with start of week dates Source code in quinn/functions.py def week_start_date(col: Column, week_start_day: str = \"Sun\") -> Column: \"\"\"Function takes a Spark `Column` and an optional `week_start_day` argument and returns a `Column` with the corresponding start of week dates. The \"standard week\" in Spark starts on Sunday, however an optional argument can be used to start the week from a different day, e.g. Monday. The `week_start_day` argument is a string corresponding to the day of the week to start the week from, e.g. `\"Mon\"`, `\"Tue\"`, and must be in the set: `{\"Sun\", \"Mon\", \"Tue\", \"Wed\", \"Thu\", \"Fri\", \"Sat\"}`. If the argument given is not a valid day then a `ValueError` will be raised. :param col: The column to determine start of week dates on :type col: Column :param week_start_day: The day to start the week on :type week_start_day: str :returns: A Column with start of week dates :rtype: Column \"\"\" _raise_if_invalid_day(week_start_day) # the \"standard week\" in Spark is from Sunday to Saturday mapping = { \"Sun\": \"Sat\", \"Mon\": \"Sun\", \"Tue\": \"Mon\", \"Wed\": \"Tue\", \"Thu\": \"Wed\", \"Fri\": \"Thu\", \"Sat\": \"Fri\", } end = week_end_date(col, mapping[week_start_day]) return F.date_add(end, -6) with_columns_renamed(fun) Ffunction designed to rename the columns of a Spark DataFrame . It takes a Callable[[str], str] object as an argument ( fun ) and returns a Callable[[DataFrame], DataFrame] object. When _() is called on a DataFrame , it creates a list of column names, applying the argument fun() to each of them, and returning a new DataFrame with the new column names. Parameters: Name Type Description Default fun Callable [[ str ], str ] Renaming function required Returns: Type Description Callable [[ DataFrame ], DataFrame ] Function which takes DataFrame as parameter. Source code in quinn/transformations.py def with_columns_renamed(fun: Callable[[str], str]) -> Callable[[DataFrame], DataFrame]: \"\"\"Ffunction designed to rename the columns of a `Spark DataFrame`. It takes a `Callable[[str], str]` object as an argument (``fun``) and returns a `Callable[[DataFrame], DataFrame]` object. When `_()` is called on a `DataFrame`, it creates a list of column names, applying the argument `fun()` to each of them, and returning a new `DataFrame` with the new column names. :param fun: Renaming function :returns: Function which takes DataFrame as parameter. \"\"\" def _(df: DataFrame) -> DataFrame: cols = [F.col(f\"`{col_name}`\").alias(fun(col_name)) for col_name in df.columns] return df.select(*cols) return _ with_some_columns_renamed(fun, change_col_name) Function that takes a Callable[[str], str] and a Callable[[str], str] and returns a Callable[[DataFrame], DataFrame] . Which in turn takes a DataFrame and returns a DataFrame with some of its columns renamed. Parameters: Name Type Description Default fun Callable [[ str ], str ] A function that takes a column name as a string and returns a new name as a string. required change_col_name Callable [[ str ], str ] A function that takes a column name as a string and returns a boolean. required Returns: Type Description `Callable[[DataFrame], DataFrame]` A Callable[[DataFrame], DataFrame] , which takes a DataFrame and returns a DataFrame with some of its columns renamed. Source code in quinn/transformations.py def with_some_columns_renamed( fun: Callable[[str], str], change_col_name: Callable[[str], str], ) -> Callable[[DataFrame], DataFrame]: \"\"\"Function that takes a `Callable[[str], str]` and a `Callable[[str], str]` and returns a `Callable[[DataFrame], DataFrame]`. Which in turn takes a `DataFrame` and returns a `DataFrame` with some of its columns renamed. :param fun: A function that takes a column name as a string and returns a new name as a string. :type fun: `Callable[[str], str]` :param change_col_name: A function that takes a column name as a string and returns a boolean. :type change_col_name: `Callable[[str], str]` :return: A `Callable[[DataFrame], DataFrame]`, which takes a `DataFrame` and returns a `DataFrame` with some of its columns renamed. :rtype: `Callable[[DataFrame], DataFrame]` \"\"\" def _(df: DataFrame) -> DataFrame: cols = [ F.col(f\"`{col_name}`\").alias(fun(col_name)) if change_col_name(col_name) else F.col(f\"`{col_name}`\") for col_name in df.columns ] return df.select(*cols) return _","title":"Index"},{"location":"reference/quinn/#quinn.DataFrameMissingColumnError","text":"Bases: ValueError Raise this when there's a DataFrame column error. Source code in quinn/dataframe_validator.py class DataFrameMissingColumnError(ValueError): \"\"\"Raise this when there's a DataFrame column error.\"\"\"","title":"DataFrameMissingColumnError"},{"location":"reference/quinn/#quinn.DataFrameMissingStructFieldError","text":"Bases: ValueError Raise this when there's a DataFrame column error. Source code in quinn/dataframe_validator.py class DataFrameMissingStructFieldError(ValueError): \"\"\"Raise this when there's a DataFrame column error.\"\"\"","title":"DataFrameMissingStructFieldError"},{"location":"reference/quinn/#quinn.DataFrameProhibitedColumnError","text":"Bases: ValueError Raise this when a DataFrame includes prohibited columns. Source code in quinn/dataframe_validator.py class DataFrameProhibitedColumnError(ValueError): \"\"\"Raise this when a DataFrame includes prohibited columns.\"\"\"","title":"DataFrameProhibitedColumnError"},{"location":"reference/quinn/#quinn.anti_trim","text":"Remove whitespace from the boundaries of col using the regexp_replace function. Parameters: Name Type Description Default col Column Column on which to perform the regexp_replace. required Returns: Type Description Column A new Column with all whitespace removed from the boundaries. Source code in quinn/functions.py def anti_trim(col: Column) -> Column: \"\"\"Remove whitespace from the boundaries of ``col`` using the regexp_replace function. :param col: Column on which to perform the regexp_replace. :type col: Column :return: A new Column with all whitespace removed from the boundaries. :rtype: Column \"\"\" return F.regexp_replace(col, \"\\\\b\\\\s+\\\\b\", \"\")","title":"anti_trim()"},{"location":"reference/quinn/#quinn.approx_equal","text":"Compare two Column objects by checking if the difference between them is less than a specified threshold . Parameters: Name Type Description Default col1 Column the first Column required col2 Column the second Column required threshold Number value to compare with required Returns: Type Description Column Boolean Column with True indicating that abs(col1 - col2) is less than threshold Source code in quinn/functions.py def approx_equal(col1: Column, col2: Column, threshold: Number) -> Column: \"\"\"Compare two ``Column`` objects by checking if the difference between them is less than a specified ``threshold``. :param col1: the first ``Column`` :type col1: Column :param col2: the second ``Column`` :type col2: Column :param threshold: value to compare with :type threshold: Number :return: Boolean ``Column`` with ``True`` indicating that ``abs(col1 - col2)`` is less than ``threshold`` \"\"\" return F.abs(col1 - col2) < threshold","title":"approx_equal()"},{"location":"reference/quinn/#quinn.business_days_between","text":"Function takes two Spark Columns and returns a Column with the number of business days between the start and the end date. Parameters: Name Type Description Default start_date Column The column with the start dates required end_date Column The column with the end dates required Returns: Type Description Column a Column with the number of business days between the start and the end date Source code in quinn/functions.py def business_days_between(start_date: Column, end_date: Column) -> Column: # noqa: ARG001 \"\"\"Function takes two Spark `Columns` and returns a `Column` with the number of business days between the start and the end date. :param start_date: The column with the start dates :type start_date: Column :param end_date: The column with the end dates :type end_date: Column :returns: a Column with the number of business days between the start and the end date :rtype: Column \"\"\" all_days = \"sequence(start_date, end_date)\" days_of_week = f\"transform({all_days}, x -> date_format(x, 'E'))\" filter_weekends = F.expr(f\"filter({days_of_week}, x -> x NOT IN ('Sat', 'Sun'))\") num_business_days = F.size(filter_weekends) - 1 return F.when(num_business_days < 0, None).otherwise(num_business_days)","title":"business_days_between()"},{"location":"reference/quinn/#quinn.column_to_list","text":"Collect column to list of values. Parameters: Name Type Description Default df DataFrame Input DataFrame required col_name str Column to collect required Returns: Type Description List[Any] List of values Source code in quinn/dataframe_helpers.py def column_to_list(df: DataFrame, col_name: str) -> list[Any]: \"\"\"Collect column to list of values. :param df: Input DataFrame :type df: pyspark.sql.DataFrame :param col_name: Column to collect :type col_name: str :return: List of values :rtype: List[Any] \"\"\" return [x[col_name] for x in df.select(col_name).collect()]","title":"column_to_list()"},{"location":"reference/quinn/#quinn.create_df","text":"Create a new DataFrame from the given data and column specs. The returned DataFrame s created using the StructType and StructField classes provided by PySpark. Parameters: Name Type Description Default spark SparkSession SparkSession object required rows_data array-like the data used to create the DataFrame required col_specs list of tuples list of tuples containing the name and type of the field required Returns: Type Description DataFrame a new DataFrame Source code in quinn/dataframe_helpers.py def create_df(spark: SparkSession, rows_data, col_specs) -> DataFrame: # noqa: ANN001 \"\"\"Create a new DataFrame from the given data and column specs. The returned DataFrame s created using the StructType and StructField classes provided by PySpark. :param spark: SparkSession object :type spark: SparkSession :param rows_data: the data used to create the DataFrame :type rows_data: array-like :param col_specs: list of tuples containing the name and type of the field :type col_specs: list of tuples :return: a new DataFrame :rtype: DataFrame \"\"\" struct_fields = list(map(lambda x: StructField(*x), col_specs)) # noqa: C417 return spark.createDataFrame(data=rows_data, schema=StructType(struct_fields))","title":"create_df()"},{"location":"reference/quinn/#quinn.exists","text":"Create a user-defined function. It takes a list expressed as a column of type ArrayType(AnyType) as an argument and returns a boolean value indicating whether any element in the list is true according to the argument f of the exists() function. Parameters: Name Type Description Default f Callable [[ Any ], bool ] Callable function - A callable function that takes an element of type Any and returns a boolean value. required Returns: Type Description UserDefinedFunction A user-defined function that takes a list expressed as a column of type ArrayType(AnyType) as an argument and returns a boolean value indicating whether any element in the list is true according to the argument f of the exists() function. Source code in quinn/functions.py def exists(f: Callable[[Any], bool]) -> udf: \"\"\"Create a user-defined function. It takes a list expressed as a column of type ``ArrayType(AnyType)`` as an argument and returns a boolean value indicating whether any element in the list is true according to the argument ``f`` of the ``exists()`` function. :param f: Callable function - A callable function that takes an element of type Any and returns a boolean value. :return: A user-defined function that takes a list expressed as a column of type ArrayType(AnyType) as an argument and returns a boolean value indicating whether any element in the list is true according to the argument ``f`` of the ``exists()`` function. :rtype: UserDefinedFunction \"\"\" def temp_udf(list_: list) -> bool: return any(map(f, list_)) return F.udf(temp_udf, BooleanType())","title":"exists()"},{"location":"reference/quinn/#quinn.forall","text":"The forall function allows for mapping a given boolean function to a list of arguments and return a single boolean value. It does this by creating a Spark UDF which takes in a list of arguments, applying the given boolean function to each element of the list and returning a single boolean value if all the elements pass through the given boolean function. Parameters: Name Type Description Default f Callable [[ Any ], bool ] A callable function f which takes in any type and returns a boolean required Returns: Type Description UserDefinedFunction A spark UDF which accepts a list of arguments and returns True if all elements pass through the given boolean function, False otherwise. Source code in quinn/functions.py def forall(f: Callable[[Any], bool]) -> udf: \"\"\"The **forall** function allows for mapping a given boolean function to a list of arguments and return a single boolean value. It does this by creating a Spark UDF which takes in a list of arguments, applying the given boolean function to each element of the list and returning a single boolean value if all the elements pass through the given boolean function. :param f: A callable function ``f`` which takes in any type and returns a boolean :return: A spark UDF which accepts a list of arguments and returns True if all elements pass through the given boolean function, False otherwise. :rtype: UserDefinedFunction \"\"\" def temp_udf(list_: list) -> bool: return all(map(f, list_)) return F.udf(temp_udf, BooleanType())","title":"forall()"},{"location":"reference/quinn/#quinn.multi_equals","text":"Create a user-defined function that checks if all the given columns have the designated value. Parameters: Name Type Description Default value Any The designated value. required Returns: Type Description UserDifinedFunction A user-defined function of type BooleanType(). Source code in quinn/functions.py def multi_equals(value: Any) -> udf: # noqa: ANN401 \"\"\"Create a user-defined function that checks if all the given columns have the designated value. :param value: The designated value. :type value: Any :return: A user-defined function of type BooleanType(). :rtype: UserDifinedFunction \"\"\" def temp_udf(*cols) -> bool: # noqa: ANN002 return all(map(lambda col: col == value, cols)) # noqa: C417 return F.udf(temp_udf, BooleanType())","title":"multi_equals()"},{"location":"reference/quinn/#quinn.print_athena_create_table","text":"Generate the Athena create table statement for a given DataFrame. Parameters: Name Type Description Default df DataFrame The pyspark.sql.DataFrame to use required athena_table_name str The name of the athena table to generate required s3location str The S3 location of the parquet data required Returns: Type Description None None Source code in quinn/dataframe_helpers.py def print_athena_create_table( df: DataFrame, athena_table_name: str, s3location: str, ) -> None: \"\"\"Generate the Athena create table statement for a given DataFrame. :param df: The pyspark.sql.DataFrame to use :param athena_table_name: The name of the athena table to generate :param s3location: The S3 location of the parquet data :return: None \"\"\" fields = df.schema print(f\"CREATE EXTERNAL TABLE IF NOT EXISTS `{athena_table_name}` ( \") for field in fields.fieldNames()[:-1]: print(\"\\t\", f\"`{fields[field].name}` {fields[field].dataType.simpleString()}, \") last = fields[fields.fieldNames()[-1]] print(\"\\t\", f\"`{last.name}` {last.dataType.simpleString()} \") print(\")\") print(\"STORED AS PARQUET\") print(f\"LOCATION '{s3location}'\\n\")","title":"print_athena_create_table()"},{"location":"reference/quinn/#quinn.print_schema_as_code","text":"Represent DataType (including StructType) as valid Python code. Parameters: Name Type Description Default dtype T . DataType The input DataType or Schema object required Returns: Type Description str A valid python code which generate the same schema. Source code in quinn/schema_helpers.py def print_schema_as_code(dtype: T.DataType) -> str: \"\"\"Represent DataType (including StructType) as valid Python code. :param dtype: The input DataType or Schema object :type dtype: pyspark.sql.types.DataType :return: A valid python code which generate the same schema. :rtype: str \"\"\" res = [] if isinstance(dtype, T.StructType): res.append(\"StructType(\\n\\tfields=[\") for field in dtype.fields: for line in _repr_column(field).split(\"\\n\"): res.append(\"\\n\\t\\t\") res.append(line) res.append(\",\") res.append(\"\\n\\t]\\n)\") elif isinstance(dtype, T.ArrayType): res.append(\"ArrayType(\") res.append(print_schema_as_code(dtype.elementType)) res.append(\")\") elif isinstance(dtype, T.MapType): res.append(\"MapType(\") res.append(f\"\\n\\t{print_schema_as_code(dtype.keyType)},\") for line in print_schema_as_code(dtype.valueType).split(\"\\n\"): res.append(\"\\n\\t\") res.append(line) res.append(\",\") res.append(f\"\\n\\t{dtype.valueContainsNull},\") res.append(\"\\n)\") elif isinstance(dtype, T.DecimalType): res.append(f\"DecimalType({dtype.precision}, {dtype.scale})\") elif str(dtype).endswith(\"()\"): # PySpark 3.3+ res.append(str(dtype)) else: res.append(f\"{dtype}()\") return \"\".join(res)","title":"print_schema_as_code()"},{"location":"reference/quinn/#quinn.regexp_extract_all","text":"Function uses the Python re library to extract regular expressions from a string ( s ) using a regex pattern ( regexp ). It returns a list of all matches, or None if s is None . Parameters: Name Type Description Default s Column input string ( Column ) required regexp Column string re pattern required Source code in quinn/functions.py @F.udf(returnType=ArrayType(StringType())) def regexp_extract_all(s: Column, regexp: Column) -> Column: \"\"\"Function uses the Python `re` library to extract regular expressions from a string (`s`) using a regex pattern (`regexp`). It returns a list of all matches, or `None` if `s` is `None`. :param s: input string (`Column`) :type s: str :param regexp: string `re` pattern :rtype: Column \"\"\" return None if s is None else re.findall(regexp, s)","title":"regexp_extract_all()"},{"location":"reference/quinn/#quinn.remove_all_whitespace","text":"Function takes a Column object as a parameter and returns a Column object with all white space removed. It does this using the regexp_replace function from F, which replaces all whitespace with an empty string. Parameters: Name Type Description Default col Column a Column object required Returns: Type Description Column a Column object with all white space removed Source code in quinn/functions.py def remove_all_whitespace(col: Column) -> Column: \"\"\"Function takes a `Column` object as a parameter and returns a `Column` object with all white space removed. It does this using the regexp_replace function from F, which replaces all whitespace with an empty string. :param col: a `Column` object :type col: Column :returns: a `Column` object with all white space removed :rtype: Column \"\"\" return F.regexp_replace(col, \"\\\\s+\", \"\")","title":"remove_all_whitespace()"},{"location":"reference/quinn/#quinn.remove_non_word_characters","text":"Removes non-word characters from a column. The non-word characters which will be removed are those identified by the regular expression \"[^\\\\w\\\\s]+\" . This expression represents any character that is not a word character (e.g. \\\\w ) or whitespace ( \\\\s ). Parameters: Name Type Description Default col Column A Column object. required Returns: Type Description Column A Column object with non-word characters removed. Source code in quinn/functions.py def remove_non_word_characters(col: Column) -> Column: r\"\"\"Removes non-word characters from a column. The non-word characters which will be removed are those identified by the regular expression ``\"[^\\\\w\\\\s]+\"``. This expression represents any character that is not a word character (e.g. `\\\\w`) or whitespace (`\\\\s`). :param col: A Column object. :return: A Column object with non-word characters removed. \"\"\" return F.regexp_replace(col, \"[^\\\\w\\\\s]+\", \"\")","title":"remove_non_word_characters()"},{"location":"reference/quinn/#quinn.show_output_to_df","text":"Show output as spark DataFrame. Parameters: Name Type Description Default show_output str String representing output of 'show' command in spark required spark SparkSession SparkSession object required Returns: Type Description Dataframe DataFrame object containing output of a show command in spark Source code in quinn/dataframe_helpers.py def show_output_to_df(show_output: str, spark: SparkSession) -> DataFrame: \"\"\"Show output as spark DataFrame. :param show_output: String representing output of 'show' command in spark :type show_output: str :param spark: SparkSession object :type spark: SparkSession :return: DataFrame object containing output of a show command in spark :rtype: Dataframe \"\"\" lines = show_output.split(\"\\n\") ugly_column_names = lines[1] pretty_column_names = [i.strip() for i in ugly_column_names[1:-1].split(\"|\")] pretty_data = [] ugly_data = lines[3:-1] for row in ugly_data: r = [i.strip() for i in row[1:-1].split(\"|\")] pretty_data.append(tuple(r)) return spark.createDataFrame(pretty_data, pretty_column_names)","title":"show_output_to_df()"},{"location":"reference/quinn/#quinn.single_space","text":"Function takes a column and replaces all the multiple white spaces with a single space. It then trims the column to make all the texts consistent. Parameters: Name Type Description Default col Column The column which needs to be spaced required Returns: Type Description Column A trimmed column with single space Source code in quinn/functions.py def single_space(col: Column) -> Column: \"\"\"Function takes a column and replaces all the multiple white spaces with a single space. It then trims the column to make all the texts consistent. :param col: The column which needs to be spaced :type col: Column :returns: A trimmed column with single space :rtype: Column \"\"\" return F.trim(F.regexp_replace(col, \" +\", \" \"))","title":"single_space()"},{"location":"reference/quinn/#quinn.snake_case_col_names","text":"Function takes a DataFrame instance and returns the same DataFrame instance with all column names converted to snake case. (e.g. col_name_1 ). It uses the to_snake_case function in conjunction with the with_columns_renamed function to achieve this. Parameters: Name Type Description Default df DataFrame A DataFrame instance to process required Returns: Type Description ``DataFrame``. A DataFrame instance with column names converted to snake case Source code in quinn/transformations.py def snake_case_col_names(df: DataFrame) -> DataFrame: \"\"\"Function takes a ``DataFrame`` instance and returns the same ``DataFrame`` instance with all column names converted to snake case. (e.g. ``col_name_1``). It uses the ``to_snake_case`` function in conjunction with the ``with_columns_renamed`` function to achieve this. :param df: A ``DataFrame`` instance to process :type df: ``DataFrame`` :return: A ``DataFrame`` instance with column names converted to snake case :rtype: ``DataFrame``. \"\"\" return with_columns_renamed(to_snake_case)(df)","title":"snake_case_col_names()"},{"location":"reference/quinn/#quinn.sort_columns","text":"This function sorts the columns of a given DataFrame based on a given sort order. The sort_order parameter can either be asc or desc , which correspond to ascending and descending order, respectively. If any other value is provided for the sort_order parameter, a ValueError will be raised. Parameters: Name Type Description Default df DataFrame A DataFrame required sort_order str The order in which to sort the columns in the DataFrame required sort_nested bool Whether to sort nested structs or not. Defaults to false. False Returns: Type Description pyspark.sql.DataFrame A DataFrame with the columns sorted in the chosen order Source code in quinn/transformations.py def sort_columns( # noqa: C901,PLR0915 df: DataFrame, sort_order: str, sort_nested: bool = False, ) -> DataFrame: \"\"\"This function sorts the columns of a given DataFrame based on a given sort order. The ``sort_order`` parameter can either be ``asc`` or ``desc``, which correspond to ascending and descending order, respectively. If any other value is provided for the ``sort_order`` parameter, a ``ValueError`` will be raised. :param df: A DataFrame :type df: pyspark.sql.DataFrame :param sort_order: The order in which to sort the columns in the DataFrame :type sort_order: str :param sort_nested: Whether to sort nested structs or not. Defaults to false. :type sort_nested: bool :return: A DataFrame with the columns sorted in the chosen order :rtype: pyspark.sql.DataFrame \"\"\" def sort_nested_cols(schema, is_reversed, base_field=\"\") -> list[str]: # noqa: ANN001 # recursively check nested fields and sort them # https://stackoverflow.com/questions/57821538/how-to-sort-columns-of-nested-structs-alphabetically-in-pyspark # Credits: @pault for logic def parse_fields( fields_to_sort: list, parent_struct, is_reversed: bool, # noqa: ANN001 ) -> list: sorted_fields: list = sorted( fields_to_sort, key=lambda x: x[\"name\"], reverse=is_reversed, ) results = [] for field in sorted_fields: new_struct = StructType([StructField.fromJson(field)]) new_base_field = parent_struct.name if base_field: new_base_field = base_field + \".\" + new_base_field results.extend( sort_nested_cols(new_struct, is_reversed, base_field=new_base_field), ) return results select_cols = [] for parent_struct in sorted(schema, key=lambda x: x.name, reverse=is_reversed): field_type = parent_struct.dataType if isinstance(field_type, ArrayType): array_parent = parent_struct.jsonValue()[\"type\"][\"elementType\"] base_str = f\"transform({parent_struct.name}\" suffix_str = f\") AS {parent_struct.name}\" # if struct in array, create mapping to struct if array_parent[\"type\"] == \"struct\": array_parent = array_parent[\"fields\"] base_str = f\"{base_str}, x -> struct(\" suffix_str = f\"){suffix_str}\" array_elements = parse_fields(array_parent, parent_struct, is_reversed) element_names = [i.split(\".\")[-1] for i in array_elements] array_elements_formatted = [f\"x.{i} as {i}\" for i in element_names] # create a string representation of the sorted array # ex: transform(phone_numbers, x -> struct(x.number as number, x.type as type)) AS phone_numbers result = f\"{base_str}{', '.join(array_elements_formatted)}{suffix_str}\" elif isinstance(field_type, StructType): field_list = parent_struct.jsonValue()[\"type\"][\"fields\"] sub_fields = parse_fields(field_list, parent_struct, is_reversed) # create a string representation of the sorted struct # ex: struct(address.zip.first5, address.zip.last4) AS zip result = f\"struct({', '.join(sub_fields)}) AS {parent_struct.name}\" elif base_field: result = f\"{base_field}.{parent_struct.name}\" else: result = parent_struct.name select_cols.append(result) return select_cols def get_original_nullability(field: StructField, result_dict: dict) -> None: if hasattr(field, \"nullable\"): result_dict[field.name] = field.nullable else: result_dict[field.name] = True if not isinstance(field.dataType, StructType) and not isinstance( field.dataType, ArrayType, ): return if isinstance(field.dataType, ArrayType): result_dict[f\"{field.name}_element\"] = field.dataType.containsNull children = field.dataType.elementType.fields else: children = field.dataType.fields for i in children: get_original_nullability(i, result_dict) def fix_nullability(field: StructField, result_dict: dict) -> None: field.nullable = result_dict[field.name] if not isinstance(field.dataType, StructType) and not isinstance( field.dataType, ArrayType, ): return if isinstance(field.dataType, ArrayType): # save the containsNull property of the ArrayType field.dataType.containsNull = result_dict[f\"{field.name}_element\"] children = field.dataType.elementType.fields else: children = field.dataType.fields for i in children: fix_nullability(i, result_dict) if sort_order not in [\"asc\", \"desc\"]: msg = f\"['asc', 'desc'] are the only valid sort orders and you entered a sort order of '{sort_order}'\" raise ValueError( msg, ) reverse_lookup = { \"asc\": False, \"desc\": True, } is_reversed: bool = reverse_lookup[sort_order] top_level_sorted_df = df.select(*sorted(df.columns, reverse=is_reversed)) if not sort_nested: return top_level_sorted_df is_nested: bool = any( isinstance(i.dataType, (StructType, ArrayType)) for i in top_level_sorted_df.schema ) if not is_nested: return top_level_sorted_df fully_sorted_schema = sort_nested_cols(top_level_sorted_df.schema, is_reversed) output = df.selectExpr(fully_sorted_schema) result_dict = {} for field in df.schema: get_original_nullability(field, result_dict) for field in output.schema: fix_nullability(field, result_dict) if not hasattr(SparkSession, \"getActiveSession\"): # spark 2.4 spark = SparkSession.builder.getOrCreate() else: spark = SparkSession.getActiveSession() spark = spark if spark is not None else SparkSession.builder.getOrCreate() return spark.createDataFrame(output.rdd, output.schema)","title":"sort_columns()"},{"location":"reference/quinn/#quinn.split_col","text":"Splits the given column based on the delimiter and creates new columns with the split values. Parameters: Name Type Description Default df DataFrame The input DataFrame required col_name str The name of the column to split required delimiter str The delimiter to split the column on required new_col_names list [ str ] A list of two strings for the new column names required mode str The split mode. Can be \"strict\" or \"permissive\". Default is \"permissive\" 'permissive' default Optional [ str ] If the mode is \"permissive\" then default value will be assigned to column None Returns: Type Description pyspark.sql.DataFrame. dataframe: The resulting DataFrame with the split columns Source code in quinn/split_columns.py def split_col( # noqa: PLR0913 df: DataFrame, col_name: str, delimiter: str, new_col_names: list[str], mode: str = \"permissive\", default: Optional[str] = None, ) -> DataFrame: \"\"\"Splits the given column based on the delimiter and creates new columns with the split values. :param df: The input DataFrame :type df: pyspark.sql.DataFrame :param col_name: The name of the column to split :type col_name: str :param delimiter: The delimiter to split the column on :type delimiter: str :param new_col_names: A list of two strings for the new column names :type new_col_names: (List[str]) :param mode: The split mode. Can be \"strict\" or \"permissive\". Default is \"permissive\" :type mode: str :param default: If the mode is \"permissive\" then default value will be assigned to column :type mode: str :return: dataframe: The resulting DataFrame with the split columns :rtype: pyspark.sql.DataFrame. \"\"\" # Check if the column to be split exists in the DataFrame if col_name not in df.columns: msg = f\"Column '{col_name}' not found in DataFrame.\" raise ValueError(msg) # Check if the delimiter is a string if not isinstance(delimiter, str): msg = \"Delimiter must be a string.\" raise TypeError(msg) # Check if the new column names are a list of strings if not isinstance(new_col_names, list): msg = \"New column names must be a list of strings.\" raise TypeError(msg) # Define a UDF to check the occurrence of delimitter def _num_delimiter(col_value1: str) -> int: # Get the count of delimiter and store the result in no_of_delimiter no_of_delimiter = col_value1.count(delimiter) # Split col_value based on delimiter and store the result in split_value split_value = col_value1.split(delimiter) # Check if col_value is not None if col_value1 is not None: # Check if the no of delimiters in split_value is not as expected if no_of_delimiter != len(new_col_names) - 1: # If the length is not same, raise an IndexError with the message mentioning the expected and found length msg = f\"Expected {len(new_col_names)} elements after splitting on delimiter, found {len(split_value)} elements\" raise IndexError( msg, ) # If the length of split_value is same as new_col_names, check if any of the split values is None or empty string elif any( # noqa: RET506 x is None or x.strip() == \"\" for x in split_value[: len(new_col_names)] ): msg = \"Null or empty values are not accepted for columns in strict mode\" raise ValueError( msg, ) # If the above checks pass, return the count of delimiter return int(no_of_delimiter) # If col_value is None, return 0 return 0 num_udf = udf(lambda y: None if y is None else _num_delimiter(y), IntegerType()) # Get the column expression for the column to be split col_expr = df[col_name] # Split the column by the delimiter split_col_expr = split(trim(col_expr), delimiter) # Check the split mode if mode == \"strict\": # Create an array of select expressions to create new columns from the split values select_exprs = [ when(split_col_expr.getItem(i) != \"\", split_col_expr.getItem(i)).alias( new_col_names[i], ) for i in range(len(new_col_names)) ] # Select all the columns from the input DataFrame, along with the new split columns df = df.select(\"*\", *select_exprs) # noqa: PD901 df = df.withColumn(\"del_length\", num_udf(df[col_name])) # noqa: PD901 df.cache() # Drop the original column if the new columns were created successfully df = df.select([c for c in df.columns if c not in {\"del_length\", col_name}]) # noqa: PD901 elif mode == \"permissive\": # Create an array of select expressions to create new columns from the split values # Use the default value if a split value is missing or empty select_exprs = select_exprs = [ when(length(split_col_expr.getItem(i)) > 0, split_col_expr.getItem(i)) .otherwise(default) .alias(new_col_names[i]) for i in range(len(new_col_names)) ] # Select all the columns from the input DataFrame, along with the new split columns # Drop the original column if the new columns were created successfully df = df.select(\"*\", *select_exprs).drop(col_name) # noqa: PD901 df.cache() else: msg = f\"Invalid mode: {mode}\" raise ValueError(msg) # Return the DataFrame with the split columns return df","title":"split_col()"},{"location":"reference/quinn/#quinn.to_list_of_dictionaries","text":"Convert a Spark DataFrame to a list of dictionaries. Parameters: Name Type Description Default df DataFrame The Spark DataFrame to convert. required Returns: Type Description List[Dict[str, Any]] A list of dictionaries representing the rows in the DataFrame. Source code in quinn/dataframe_helpers.py def to_list_of_dictionaries(df: DataFrame) -> list[dict[str, Any]]: \"\"\"Convert a Spark DataFrame to a list of dictionaries. :param df: The Spark DataFrame to convert. :type df: :py:class:`pyspark.sql.DataFrame` :return: A list of dictionaries representing the rows in the DataFrame. :rtype: List[Dict[str, Any]] \"\"\" return list(map(lambda r: r.asDict(), df.collect())) # noqa: C417","title":"to_list_of_dictionaries()"},{"location":"reference/quinn/#quinn.to_snake_case","text":"Takes a string and converts it to snake case format. Parameters: Name Type Description Default s str The string to be converted. required Returns: Type Description str The string in snake case format. Source code in quinn/transformations.py def to_snake_case(s: str) -> str: \"\"\"Takes a string and converts it to snake case format. :param s: The string to be converted. :type s: str :return: The string in snake case format. :rtype: str \"\"\" return s.lower().replace(\" \", \"_\")","title":"to_snake_case()"},{"location":"reference/quinn/#quinn.two_columns_to_dictionary","text":"Collect two columns as dictionary when first column is key and second is value. Parameters: Name Type Description Default df DataFrame Input DataFrame required key_col_name str Key-column required value_col_name str Value-column required Returns: Type Description Dict[str, Any] Dictionary with values Source code in quinn/dataframe_helpers.py def two_columns_to_dictionary( df: DataFrame, key_col_name: str, value_col_name: str, ) -> dict[str, Any]: \"\"\"Collect two columns as dictionary when first column is key and second is value. :param df: Input DataFrame :type df: pyspark.sql.DataFrame :param key_col_name: Key-column :type key_col_name: str :param value_col_name: Value-column :type value_col_name: str :return: Dictionary with values :rtype: Dict[str, Any] \"\"\" k, v = key_col_name, value_col_name return {x[k]: x[v] for x in df.select(k, v).collect()}","title":"two_columns_to_dictionary()"},{"location":"reference/quinn/#quinn.uuid5","text":"Function generates UUIDv5 from col and namespace , optionally prepending an extra string to col . Sets variant to RFC 4122 one. Parameters: Name Type Description Default col Column Column that will be hashed. required namespace uuid . UUID Namespace to be used. (default: uuid.NAMESPACE_DNS ) uuid.NAMESPACE_DNS extra_string str In case of collisions one can pass an extra string to hash on. '' Returns: Type Description Column String representation of generated UUIDv5 Source code in quinn/functions.py def uuid5( col: Column, namespace: uuid.UUID = uuid.NAMESPACE_DNS, extra_string: str = \"\", ) -> Column: \"\"\"Function generates UUIDv5 from ``col`` and ``namespace``, optionally prepending an extra string to ``col``. Sets variant to RFC 4122 one. :param col: Column that will be hashed. :type col: Column :param namespace: Namespace to be used. (default: `uuid.NAMESPACE_DNS`) :type namespace: str :param extra_string: In case of collisions one can pass an extra string to hash on. :type extra_string: str :return: String representation of generated UUIDv5 :rtype: Column \"\"\" ns = F.lit(namespace.bytes) salted_col = F.concat(F.lit(extra_string), col) encoded = F.encode(salted_col, \"utf-8\") encoded_with_ns = F.concat(ns, encoded) hashed = F.sha1(encoded_with_ns) variant_part = F.substring(hashed, 17, 4) variant_part = F.conv(variant_part, 16, 2) variant_part = F.lpad(variant_part, 16, \"0\") variant_part = F.concat( F.lit(\"10\"), F.substring(variant_part, 3, 16), ) # RFC 4122 variant. variant_part = F.lower(F.conv(variant_part, 2, 16)) return F.concat_ws( \"-\", F.substring(hashed, 1, 8), F.substring(hashed, 9, 4), F.concat(F.lit(\"5\"), F.substring(hashed, 14, 3)), # Set version. variant_part, F.substring(hashed, 21, 12), )","title":"uuid5()"},{"location":"reference/quinn/#quinn.validate_absence_of_columns","text":"Validate that none of the prohibited column names are present among specified DataFrame columns. Parameters: Name Type Description Default df DataFrame DataFrame containing columns to be checked. required prohibited_col_names list [ str ] List of prohibited column names. required Raises: Type Description DataFrameProhibitedColumnError If the prohibited column names are present among the specified DataFrame columns. Source code in quinn/dataframe_validator.py def validate_absence_of_columns(df: DataFrame, prohibited_col_names: list[str]) -> None: \"\"\"Validate that none of the prohibited column names are present among specified DataFrame columns. :param df: DataFrame containing columns to be checked. :param prohibited_col_names: List of prohibited column names. :raises DataFrameProhibitedColumnError: If the prohibited column names are present among the specified DataFrame columns. \"\"\" all_col_names = df.columns extra_col_names = [x for x in all_col_names if x in prohibited_col_names] error_message = f\"The {extra_col_names} columns are not allowed to be included in the DataFrame with the following columns {all_col_names}\" if extra_col_names: raise DataFrameProhibitedColumnError(error_message)","title":"validate_absence_of_columns()"},{"location":"reference/quinn/#quinn.validate_presence_of_columns","text":"Validate the presence of column names in a DataFrame. Parameters: Name Type Description Default df DataFrame A spark DataFrame. required required_col_names list [ str ] List of the required column names for the DataFrame. required Returns: Type Description None None. Raises: Type Description DataFrameMissingColumnError if any of the requested column names are not present in the DataFrame. Source code in quinn/dataframe_validator.py def validate_presence_of_columns(df: DataFrame, required_col_names: list[str]) -> None: \"\"\"Validate the presence of column names in a DataFrame. :param df: A spark DataFrame. :type df: DataFrame` :param required_col_names: List of the required column names for the DataFrame. :type required_col_names: :py:class:`list` of :py:class:`str` :return: None. :raises DataFrameMissingColumnError: if any of the requested column names are not present in the DataFrame. \"\"\" all_col_names = df.columns missing_col_names = [x for x in required_col_names if x not in all_col_names] error_message = f\"The {missing_col_names} columns are not included in the DataFrame with the following columns {all_col_names}\" if missing_col_names: raise DataFrameMissingColumnError(error_message)","title":"validate_presence_of_columns()"},{"location":"reference/quinn/#quinn.validate_schema","text":"Function that validate if a given DataFrame has a given StructType as its schema. Parameters: Name Type Description Default df DataFrame DataFrame to validate required required_schema StructType StructType required for the DataFrame required ignore_nullable bool (Optional) A flag for if nullable fields should be ignored during validation False Raises: Type Description DataFrameMissingStructFieldError if any StructFields from the required schema are not included in the DataFrame schema Source code in quinn/dataframe_validator.py def validate_schema( df: DataFrame, required_schema: StructType, ignore_nullable: bool = False, ) -> None: \"\"\"Function that validate if a given DataFrame has a given StructType as its schema. :param df: DataFrame to validate :type df: DataFrame :param required_schema: StructType required for the DataFrame :type required_schema: StructType :param ignore_nullable: (Optional) A flag for if nullable fields should be ignored during validation :type ignore_nullable: bool, optional :raises DataFrameMissingStructFieldError: if any StructFields from the required schema are not included in the DataFrame schema \"\"\" _all_struct_fields = copy.deepcopy(df.schema) _required_schema = copy.deepcopy(required_schema) if ignore_nullable: for x in _all_struct_fields: x.nullable = None for x in _required_schema: x.nullable = None missing_struct_fields = [x for x in _required_schema if x not in _all_struct_fields] error_message = f\"The {missing_struct_fields} StructFields are not included in the DataFrame with the following StructFields {_all_struct_fields}\" if missing_struct_fields: raise DataFrameMissingStructFieldError(error_message)","title":"validate_schema()"},{"location":"reference/quinn/#quinn.week_end_date","text":"Return a date column for the end of week for a given day. The Spark function dayofweek considers Sunday as the first day of the week, and uses the default value of 1 to indicate Sunday. Usage of the when and otherwise functions allow a comparison between the end of week day indicated and the day of week computed, and the return of the reference date if they match or the the addition of one week to the reference date otherwise. Parameters: Name Type Description Default col Column The reference date column. required week_end_day str The week end day (default: 'Sat') 'Sat' Returns: Type Description Column A Column of end of the week dates. Source code in quinn/functions.py def week_end_date(col: Column, week_end_day: str = \"Sat\") -> Column: \"\"\"Return a date column for the end of week for a given day. The Spark function `dayofweek` considers Sunday as the first day of the week, and uses the default value of 1 to indicate Sunday. Usage of the `when` and `otherwise` functions allow a comparison between the end of week day indicated and the day of week computed, and the return of the reference date if they match or the the addition of one week to the reference date otherwise. :param col: The reference date column. :type col: Column :param week_end_day: The week end day (default: 'Sat') :type week_end_day: str :return: A Column of end of the week dates. :rtype: Column \"\"\" _raise_if_invalid_day(week_end_day) # these are the default Spark mappings. Spark considers Sunday the first day of the week. day_of_week_mapping = { \"Sun\": 1, \"Mon\": 2, \"Tue\": 3, \"Wed\": 4, \"Thu\": 5, \"Fri\": 6, \"Sat\": 7, } return F.when( F.dayofweek(col).eqNullSafe(F.lit(day_of_week_mapping[week_end_day])), col, ).otherwise(F.next_day(col, week_end_day))","title":"week_end_date()"},{"location":"reference/quinn/#quinn.week_start_date","text":"Function takes a Spark Column and an optional week_start_day argument and returns a Column with the corresponding start of week dates. The \"standard week\" in Spark starts on Sunday, however an optional argument can be used to start the week from a different day, e.g. Monday. The week_start_day argument is a string corresponding to the day of the week to start the week from, e.g. \"Mon\" , \"Tue\" , and must be in the set: {\"Sun\", \"Mon\", \"Tue\", \"Wed\", \"Thu\", \"Fri\", \"Sat\"} . If the argument given is not a valid day then a ValueError will be raised. Parameters: Name Type Description Default col Column The column to determine start of week dates on required week_start_day str The day to start the week on 'Sun' Returns: Type Description Column A Column with start of week dates Source code in quinn/functions.py def week_start_date(col: Column, week_start_day: str = \"Sun\") -> Column: \"\"\"Function takes a Spark `Column` and an optional `week_start_day` argument and returns a `Column` with the corresponding start of week dates. The \"standard week\" in Spark starts on Sunday, however an optional argument can be used to start the week from a different day, e.g. Monday. The `week_start_day` argument is a string corresponding to the day of the week to start the week from, e.g. `\"Mon\"`, `\"Tue\"`, and must be in the set: `{\"Sun\", \"Mon\", \"Tue\", \"Wed\", \"Thu\", \"Fri\", \"Sat\"}`. If the argument given is not a valid day then a `ValueError` will be raised. :param col: The column to determine start of week dates on :type col: Column :param week_start_day: The day to start the week on :type week_start_day: str :returns: A Column with start of week dates :rtype: Column \"\"\" _raise_if_invalid_day(week_start_day) # the \"standard week\" in Spark is from Sunday to Saturday mapping = { \"Sun\": \"Sat\", \"Mon\": \"Sun\", \"Tue\": \"Mon\", \"Wed\": \"Tue\", \"Thu\": \"Wed\", \"Fri\": \"Thu\", \"Sat\": \"Fri\", } end = week_end_date(col, mapping[week_start_day]) return F.date_add(end, -6)","title":"week_start_date()"},{"location":"reference/quinn/#quinn.with_columns_renamed","text":"Ffunction designed to rename the columns of a Spark DataFrame . It takes a Callable[[str], str] object as an argument ( fun ) and returns a Callable[[DataFrame], DataFrame] object. When _() is called on a DataFrame , it creates a list of column names, applying the argument fun() to each of them, and returning a new DataFrame with the new column names. Parameters: Name Type Description Default fun Callable [[ str ], str ] Renaming function required Returns: Type Description Callable [[ DataFrame ], DataFrame ] Function which takes DataFrame as parameter. Source code in quinn/transformations.py def with_columns_renamed(fun: Callable[[str], str]) -> Callable[[DataFrame], DataFrame]: \"\"\"Ffunction designed to rename the columns of a `Spark DataFrame`. It takes a `Callable[[str], str]` object as an argument (``fun``) and returns a `Callable[[DataFrame], DataFrame]` object. When `_()` is called on a `DataFrame`, it creates a list of column names, applying the argument `fun()` to each of them, and returning a new `DataFrame` with the new column names. :param fun: Renaming function :returns: Function which takes DataFrame as parameter. \"\"\" def _(df: DataFrame) -> DataFrame: cols = [F.col(f\"`{col_name}`\").alias(fun(col_name)) for col_name in df.columns] return df.select(*cols) return _","title":"with_columns_renamed()"},{"location":"reference/quinn/#quinn.with_some_columns_renamed","text":"Function that takes a Callable[[str], str] and a Callable[[str], str] and returns a Callable[[DataFrame], DataFrame] . Which in turn takes a DataFrame and returns a DataFrame with some of its columns renamed. Parameters: Name Type Description Default fun Callable [[ str ], str ] A function that takes a column name as a string and returns a new name as a string. required change_col_name Callable [[ str ], str ] A function that takes a column name as a string and returns a boolean. required Returns: Type Description `Callable[[DataFrame], DataFrame]` A Callable[[DataFrame], DataFrame] , which takes a DataFrame and returns a DataFrame with some of its columns renamed. Source code in quinn/transformations.py def with_some_columns_renamed( fun: Callable[[str], str], change_col_name: Callable[[str], str], ) -> Callable[[DataFrame], DataFrame]: \"\"\"Function that takes a `Callable[[str], str]` and a `Callable[[str], str]` and returns a `Callable[[DataFrame], DataFrame]`. Which in turn takes a `DataFrame` and returns a `DataFrame` with some of its columns renamed. :param fun: A function that takes a column name as a string and returns a new name as a string. :type fun: `Callable[[str], str]` :param change_col_name: A function that takes a column name as a string and returns a boolean. :type change_col_name: `Callable[[str], str]` :return: A `Callable[[DataFrame], DataFrame]`, which takes a `DataFrame` and returns a `DataFrame` with some of its columns renamed. :rtype: `Callable[[DataFrame], DataFrame]` \"\"\" def _(df: DataFrame) -> DataFrame: cols = [ F.col(f\"`{col_name}`\").alias(fun(col_name)) if change_col_name(col_name) else F.col(f\"`{col_name}`\") for col_name in df.columns ] return df.select(*cols) return _","title":"with_some_columns_renamed()"},{"location":"reference/quinn/append_if_schema_identical/","text":"SchemaMismatchError Bases: ValueError raise this when there's a schema mismatch between source & target schema. Source code in quinn/append_if_schema_identical.py class SchemaMismatchError(ValueError): \"\"\"raise this when there's a schema mismatch between source & target schema.\"\"\" append_if_schema_identical(source_df, target_df) Compare the schema of source & target dataframe. Parameters: Name Type Description Default source_df DataFrame Input DataFrame required target_df DataFrame Input DataFrame required Returns: Type Description pyspark.sql.DataFrame dataframe Source code in quinn/append_if_schema_identical.py def append_if_schema_identical(source_df: DataFrame, target_df: DataFrame) -> DataFrame: \"\"\"Compare the schema of source & target dataframe. :param source_df: Input DataFrame :type source_df: pyspark.sql.DataFrame :param target_df: Input DataFrame :type target_df: pyspark.sql.DataFrame :return: dataframe :rtype: pyspark.sql.DataFrame \"\"\" # Retrieve the schemas of the source and target dataframes source_schema = source_df.schema target_schema = target_df.schema # Convert the schemas to a list of tuples source_schema_list = [(field.name, str(field.dataType)) for field in source_schema] target_schema_list = [(field.name, str(field.dataType)) for field in target_schema] unmatched_cols = [ col for col in source_schema_list if col not in target_schema_list ] error_message = ( f\"The schemas of the source and target dataframes are not identical.\" f\"From source schema column {unmatched_cols} is missing in target schema\" ) # Check if the column names in the source and target schemas are the same, regardless of their order if set(source_schema.fieldNames()) != set(target_schema.fieldNames()): raise SchemaMismatchError(error_message) # Check if the column names and data types in the source and target schemas are the same, in the same order if sorted(source_schema_list) != sorted(target_schema_list): raise SchemaMismatchError(error_message) # Append the dataframes if the schemas are identical return target_df.unionByName(source_df)","title":"Append if schema identical"},{"location":"reference/quinn/append_if_schema_identical/#quinn.append_if_schema_identical.SchemaMismatchError","text":"Bases: ValueError raise this when there's a schema mismatch between source & target schema. Source code in quinn/append_if_schema_identical.py class SchemaMismatchError(ValueError): \"\"\"raise this when there's a schema mismatch between source & target schema.\"\"\"","title":"SchemaMismatchError"},{"location":"reference/quinn/append_if_schema_identical/#quinn.append_if_schema_identical.append_if_schema_identical","text":"Compare the schema of source & target dataframe. Parameters: Name Type Description Default source_df DataFrame Input DataFrame required target_df DataFrame Input DataFrame required Returns: Type Description pyspark.sql.DataFrame dataframe Source code in quinn/append_if_schema_identical.py def append_if_schema_identical(source_df: DataFrame, target_df: DataFrame) -> DataFrame: \"\"\"Compare the schema of source & target dataframe. :param source_df: Input DataFrame :type source_df: pyspark.sql.DataFrame :param target_df: Input DataFrame :type target_df: pyspark.sql.DataFrame :return: dataframe :rtype: pyspark.sql.DataFrame \"\"\" # Retrieve the schemas of the source and target dataframes source_schema = source_df.schema target_schema = target_df.schema # Convert the schemas to a list of tuples source_schema_list = [(field.name, str(field.dataType)) for field in source_schema] target_schema_list = [(field.name, str(field.dataType)) for field in target_schema] unmatched_cols = [ col for col in source_schema_list if col not in target_schema_list ] error_message = ( f\"The schemas of the source and target dataframes are not identical.\" f\"From source schema column {unmatched_cols} is missing in target schema\" ) # Check if the column names in the source and target schemas are the same, regardless of their order if set(source_schema.fieldNames()) != set(target_schema.fieldNames()): raise SchemaMismatchError(error_message) # Check if the column names and data types in the source and target schemas are the same, in the same order if sorted(source_schema_list) != sorted(target_schema_list): raise SchemaMismatchError(error_message) # Append the dataframes if the schemas are identical return target_df.unionByName(source_df)","title":"append_if_schema_identical()"},{"location":"reference/quinn/dataframe_helpers/","text":"column_to_list(df, col_name) Collect column to list of values. Parameters: Name Type Description Default df DataFrame Input DataFrame required col_name str Column to collect required Returns: Type Description List[Any] List of values Source code in quinn/dataframe_helpers.py def column_to_list(df: DataFrame, col_name: str) -> list[Any]: \"\"\"Collect column to list of values. :param df: Input DataFrame :type df: pyspark.sql.DataFrame :param col_name: Column to collect :type col_name: str :return: List of values :rtype: List[Any] \"\"\" return [x[col_name] for x in df.select(col_name).collect()] create_df(spark, rows_data, col_specs) Create a new DataFrame from the given data and column specs. The returned DataFrame s created using the StructType and StructField classes provided by PySpark. Parameters: Name Type Description Default spark SparkSession SparkSession object required rows_data array-like the data used to create the DataFrame required col_specs list of tuples list of tuples containing the name and type of the field required Returns: Type Description DataFrame a new DataFrame Source code in quinn/dataframe_helpers.py def create_df(spark: SparkSession, rows_data, col_specs) -> DataFrame: # noqa: ANN001 \"\"\"Create a new DataFrame from the given data and column specs. The returned DataFrame s created using the StructType and StructField classes provided by PySpark. :param spark: SparkSession object :type spark: SparkSession :param rows_data: the data used to create the DataFrame :type rows_data: array-like :param col_specs: list of tuples containing the name and type of the field :type col_specs: list of tuples :return: a new DataFrame :rtype: DataFrame \"\"\" struct_fields = list(map(lambda x: StructField(*x), col_specs)) # noqa: C417 return spark.createDataFrame(data=rows_data, schema=StructType(struct_fields)) print_athena_create_table(df, athena_table_name, s3location) Generate the Athena create table statement for a given DataFrame. Parameters: Name Type Description Default df DataFrame The pyspark.sql.DataFrame to use required athena_table_name str The name of the athena table to generate required s3location str The S3 location of the parquet data required Returns: Type Description None None Source code in quinn/dataframe_helpers.py def print_athena_create_table( df: DataFrame, athena_table_name: str, s3location: str, ) -> None: \"\"\"Generate the Athena create table statement for a given DataFrame. :param df: The pyspark.sql.DataFrame to use :param athena_table_name: The name of the athena table to generate :param s3location: The S3 location of the parquet data :return: None \"\"\" fields = df.schema print(f\"CREATE EXTERNAL TABLE IF NOT EXISTS `{athena_table_name}` ( \") for field in fields.fieldNames()[:-1]: print(\"\\t\", f\"`{fields[field].name}` {fields[field].dataType.simpleString()}, \") last = fields[fields.fieldNames()[-1]] print(\"\\t\", f\"`{last.name}` {last.dataType.simpleString()} \") print(\")\") print(\"STORED AS PARQUET\") print(f\"LOCATION '{s3location}'\\n\") show_output_to_df(show_output, spark) Show output as spark DataFrame. Parameters: Name Type Description Default show_output str String representing output of 'show' command in spark required spark SparkSession SparkSession object required Returns: Type Description Dataframe DataFrame object containing output of a show command in spark Source code in quinn/dataframe_helpers.py def show_output_to_df(show_output: str, spark: SparkSession) -> DataFrame: \"\"\"Show output as spark DataFrame. :param show_output: String representing output of 'show' command in spark :type show_output: str :param spark: SparkSession object :type spark: SparkSession :return: DataFrame object containing output of a show command in spark :rtype: Dataframe \"\"\" lines = show_output.split(\"\\n\") ugly_column_names = lines[1] pretty_column_names = [i.strip() for i in ugly_column_names[1:-1].split(\"|\")] pretty_data = [] ugly_data = lines[3:-1] for row in ugly_data: r = [i.strip() for i in row[1:-1].split(\"|\")] pretty_data.append(tuple(r)) return spark.createDataFrame(pretty_data, pretty_column_names) to_list_of_dictionaries(df) Convert a Spark DataFrame to a list of dictionaries. Parameters: Name Type Description Default df DataFrame The Spark DataFrame to convert. required Returns: Type Description List[Dict[str, Any]] A list of dictionaries representing the rows in the DataFrame. Source code in quinn/dataframe_helpers.py def to_list_of_dictionaries(df: DataFrame) -> list[dict[str, Any]]: \"\"\"Convert a Spark DataFrame to a list of dictionaries. :param df: The Spark DataFrame to convert. :type df: :py:class:`pyspark.sql.DataFrame` :return: A list of dictionaries representing the rows in the DataFrame. :rtype: List[Dict[str, Any]] \"\"\" return list(map(lambda r: r.asDict(), df.collect())) # noqa: C417 two_columns_to_dictionary(df, key_col_name, value_col_name) Collect two columns as dictionary when first column is key and second is value. Parameters: Name Type Description Default df DataFrame Input DataFrame required key_col_name str Key-column required value_col_name str Value-column required Returns: Type Description Dict[str, Any] Dictionary with values Source code in quinn/dataframe_helpers.py def two_columns_to_dictionary( df: DataFrame, key_col_name: str, value_col_name: str, ) -> dict[str, Any]: \"\"\"Collect two columns as dictionary when first column is key and second is value. :param df: Input DataFrame :type df: pyspark.sql.DataFrame :param key_col_name: Key-column :type key_col_name: str :param value_col_name: Value-column :type value_col_name: str :return: Dictionary with values :rtype: Dict[str, Any] \"\"\" k, v = key_col_name, value_col_name return {x[k]: x[v] for x in df.select(k, v).collect()}","title":"Dataframe helpers"},{"location":"reference/quinn/dataframe_helpers/#quinn.dataframe_helpers.column_to_list","text":"Collect column to list of values. Parameters: Name Type Description Default df DataFrame Input DataFrame required col_name str Column to collect required Returns: Type Description List[Any] List of values Source code in quinn/dataframe_helpers.py def column_to_list(df: DataFrame, col_name: str) -> list[Any]: \"\"\"Collect column to list of values. :param df: Input DataFrame :type df: pyspark.sql.DataFrame :param col_name: Column to collect :type col_name: str :return: List of values :rtype: List[Any] \"\"\" return [x[col_name] for x in df.select(col_name).collect()]","title":"column_to_list()"},{"location":"reference/quinn/dataframe_helpers/#quinn.dataframe_helpers.create_df","text":"Create a new DataFrame from the given data and column specs. The returned DataFrame s created using the StructType and StructField classes provided by PySpark. Parameters: Name Type Description Default spark SparkSession SparkSession object required rows_data array-like the data used to create the DataFrame required col_specs list of tuples list of tuples containing the name and type of the field required Returns: Type Description DataFrame a new DataFrame Source code in quinn/dataframe_helpers.py def create_df(spark: SparkSession, rows_data, col_specs) -> DataFrame: # noqa: ANN001 \"\"\"Create a new DataFrame from the given data and column specs. The returned DataFrame s created using the StructType and StructField classes provided by PySpark. :param spark: SparkSession object :type spark: SparkSession :param rows_data: the data used to create the DataFrame :type rows_data: array-like :param col_specs: list of tuples containing the name and type of the field :type col_specs: list of tuples :return: a new DataFrame :rtype: DataFrame \"\"\" struct_fields = list(map(lambda x: StructField(*x), col_specs)) # noqa: C417 return spark.createDataFrame(data=rows_data, schema=StructType(struct_fields))","title":"create_df()"},{"location":"reference/quinn/dataframe_helpers/#quinn.dataframe_helpers.print_athena_create_table","text":"Generate the Athena create table statement for a given DataFrame. Parameters: Name Type Description Default df DataFrame The pyspark.sql.DataFrame to use required athena_table_name str The name of the athena table to generate required s3location str The S3 location of the parquet data required Returns: Type Description None None Source code in quinn/dataframe_helpers.py def print_athena_create_table( df: DataFrame, athena_table_name: str, s3location: str, ) -> None: \"\"\"Generate the Athena create table statement for a given DataFrame. :param df: The pyspark.sql.DataFrame to use :param athena_table_name: The name of the athena table to generate :param s3location: The S3 location of the parquet data :return: None \"\"\" fields = df.schema print(f\"CREATE EXTERNAL TABLE IF NOT EXISTS `{athena_table_name}` ( \") for field in fields.fieldNames()[:-1]: print(\"\\t\", f\"`{fields[field].name}` {fields[field].dataType.simpleString()}, \") last = fields[fields.fieldNames()[-1]] print(\"\\t\", f\"`{last.name}` {last.dataType.simpleString()} \") print(\")\") print(\"STORED AS PARQUET\") print(f\"LOCATION '{s3location}'\\n\")","title":"print_athena_create_table()"},{"location":"reference/quinn/dataframe_helpers/#quinn.dataframe_helpers.show_output_to_df","text":"Show output as spark DataFrame. Parameters: Name Type Description Default show_output str String representing output of 'show' command in spark required spark SparkSession SparkSession object required Returns: Type Description Dataframe DataFrame object containing output of a show command in spark Source code in quinn/dataframe_helpers.py def show_output_to_df(show_output: str, spark: SparkSession) -> DataFrame: \"\"\"Show output as spark DataFrame. :param show_output: String representing output of 'show' command in spark :type show_output: str :param spark: SparkSession object :type spark: SparkSession :return: DataFrame object containing output of a show command in spark :rtype: Dataframe \"\"\" lines = show_output.split(\"\\n\") ugly_column_names = lines[1] pretty_column_names = [i.strip() for i in ugly_column_names[1:-1].split(\"|\")] pretty_data = [] ugly_data = lines[3:-1] for row in ugly_data: r = [i.strip() for i in row[1:-1].split(\"|\")] pretty_data.append(tuple(r)) return spark.createDataFrame(pretty_data, pretty_column_names)","title":"show_output_to_df()"},{"location":"reference/quinn/dataframe_helpers/#quinn.dataframe_helpers.to_list_of_dictionaries","text":"Convert a Spark DataFrame to a list of dictionaries. Parameters: Name Type Description Default df DataFrame The Spark DataFrame to convert. required Returns: Type Description List[Dict[str, Any]] A list of dictionaries representing the rows in the DataFrame. Source code in quinn/dataframe_helpers.py def to_list_of_dictionaries(df: DataFrame) -> list[dict[str, Any]]: \"\"\"Convert a Spark DataFrame to a list of dictionaries. :param df: The Spark DataFrame to convert. :type df: :py:class:`pyspark.sql.DataFrame` :return: A list of dictionaries representing the rows in the DataFrame. :rtype: List[Dict[str, Any]] \"\"\" return list(map(lambda r: r.asDict(), df.collect())) # noqa: C417","title":"to_list_of_dictionaries()"},{"location":"reference/quinn/dataframe_helpers/#quinn.dataframe_helpers.two_columns_to_dictionary","text":"Collect two columns as dictionary when first column is key and second is value. Parameters: Name Type Description Default df DataFrame Input DataFrame required key_col_name str Key-column required value_col_name str Value-column required Returns: Type Description Dict[str, Any] Dictionary with values Source code in quinn/dataframe_helpers.py def two_columns_to_dictionary( df: DataFrame, key_col_name: str, value_col_name: str, ) -> dict[str, Any]: \"\"\"Collect two columns as dictionary when first column is key and second is value. :param df: Input DataFrame :type df: pyspark.sql.DataFrame :param key_col_name: Key-column :type key_col_name: str :param value_col_name: Value-column :type value_col_name: str :return: Dictionary with values :rtype: Dict[str, Any] \"\"\" k, v = key_col_name, value_col_name return {x[k]: x[v] for x in df.select(k, v).collect()}","title":"two_columns_to_dictionary()"},{"location":"reference/quinn/dataframe_validator/","text":"DataFrameMissingColumnError Bases: ValueError Raise this when there's a DataFrame column error. Source code in quinn/dataframe_validator.py class DataFrameMissingColumnError(ValueError): \"\"\"Raise this when there's a DataFrame column error.\"\"\" DataFrameMissingStructFieldError Bases: ValueError Raise this when there's a DataFrame column error. Source code in quinn/dataframe_validator.py class DataFrameMissingStructFieldError(ValueError): \"\"\"Raise this when there's a DataFrame column error.\"\"\" DataFrameProhibitedColumnError Bases: ValueError Raise this when a DataFrame includes prohibited columns. Source code in quinn/dataframe_validator.py class DataFrameProhibitedColumnError(ValueError): \"\"\"Raise this when a DataFrame includes prohibited columns.\"\"\" validate_absence_of_columns(df, prohibited_col_names) Validate that none of the prohibited column names are present among specified DataFrame columns. Parameters: Name Type Description Default df DataFrame DataFrame containing columns to be checked. required prohibited_col_names list [ str ] List of prohibited column names. required Raises: Type Description DataFrameProhibitedColumnError If the prohibited column names are present among the specified DataFrame columns. Source code in quinn/dataframe_validator.py def validate_absence_of_columns(df: DataFrame, prohibited_col_names: list[str]) -> None: \"\"\"Validate that none of the prohibited column names are present among specified DataFrame columns. :param df: DataFrame containing columns to be checked. :param prohibited_col_names: List of prohibited column names. :raises DataFrameProhibitedColumnError: If the prohibited column names are present among the specified DataFrame columns. \"\"\" all_col_names = df.columns extra_col_names = [x for x in all_col_names if x in prohibited_col_names] error_message = f\"The {extra_col_names} columns are not allowed to be included in the DataFrame with the following columns {all_col_names}\" if extra_col_names: raise DataFrameProhibitedColumnError(error_message) validate_presence_of_columns(df, required_col_names) Validate the presence of column names in a DataFrame. Parameters: Name Type Description Default df DataFrame A spark DataFrame. required required_col_names list [ str ] List of the required column names for the DataFrame. required Returns: Type Description None None. Raises: Type Description DataFrameMissingColumnError if any of the requested column names are not present in the DataFrame. Source code in quinn/dataframe_validator.py def validate_presence_of_columns(df: DataFrame, required_col_names: list[str]) -> None: \"\"\"Validate the presence of column names in a DataFrame. :param df: A spark DataFrame. :type df: DataFrame` :param required_col_names: List of the required column names for the DataFrame. :type required_col_names: :py:class:`list` of :py:class:`str` :return: None. :raises DataFrameMissingColumnError: if any of the requested column names are not present in the DataFrame. \"\"\" all_col_names = df.columns missing_col_names = [x for x in required_col_names if x not in all_col_names] error_message = f\"The {missing_col_names} columns are not included in the DataFrame with the following columns {all_col_names}\" if missing_col_names: raise DataFrameMissingColumnError(error_message) validate_schema(df, required_schema, ignore_nullable=False) Function that validate if a given DataFrame has a given StructType as its schema. Parameters: Name Type Description Default df DataFrame DataFrame to validate required required_schema StructType StructType required for the DataFrame required ignore_nullable bool (Optional) A flag for if nullable fields should be ignored during validation False Raises: Type Description DataFrameMissingStructFieldError if any StructFields from the required schema are not included in the DataFrame schema Source code in quinn/dataframe_validator.py def validate_schema( df: DataFrame, required_schema: StructType, ignore_nullable: bool = False, ) -> None: \"\"\"Function that validate if a given DataFrame has a given StructType as its schema. :param df: DataFrame to validate :type df: DataFrame :param required_schema: StructType required for the DataFrame :type required_schema: StructType :param ignore_nullable: (Optional) A flag for if nullable fields should be ignored during validation :type ignore_nullable: bool, optional :raises DataFrameMissingStructFieldError: if any StructFields from the required schema are not included in the DataFrame schema \"\"\" _all_struct_fields = copy.deepcopy(df.schema) _required_schema = copy.deepcopy(required_schema) if ignore_nullable: for x in _all_struct_fields: x.nullable = None for x in _required_schema: x.nullable = None missing_struct_fields = [x for x in _required_schema if x not in _all_struct_fields] error_message = f\"The {missing_struct_fields} StructFields are not included in the DataFrame with the following StructFields {_all_struct_fields}\" if missing_struct_fields: raise DataFrameMissingStructFieldError(error_message)","title":"Dataframe validator"},{"location":"reference/quinn/dataframe_validator/#quinn.dataframe_validator.DataFrameMissingColumnError","text":"Bases: ValueError Raise this when there's a DataFrame column error. Source code in quinn/dataframe_validator.py class DataFrameMissingColumnError(ValueError): \"\"\"Raise this when there's a DataFrame column error.\"\"\"","title":"DataFrameMissingColumnError"},{"location":"reference/quinn/dataframe_validator/#quinn.dataframe_validator.DataFrameMissingStructFieldError","text":"Bases: ValueError Raise this when there's a DataFrame column error. Source code in quinn/dataframe_validator.py class DataFrameMissingStructFieldError(ValueError): \"\"\"Raise this when there's a DataFrame column error.\"\"\"","title":"DataFrameMissingStructFieldError"},{"location":"reference/quinn/dataframe_validator/#quinn.dataframe_validator.DataFrameProhibitedColumnError","text":"Bases: ValueError Raise this when a DataFrame includes prohibited columns. Source code in quinn/dataframe_validator.py class DataFrameProhibitedColumnError(ValueError): \"\"\"Raise this when a DataFrame includes prohibited columns.\"\"\"","title":"DataFrameProhibitedColumnError"},{"location":"reference/quinn/dataframe_validator/#quinn.dataframe_validator.validate_absence_of_columns","text":"Validate that none of the prohibited column names are present among specified DataFrame columns. Parameters: Name Type Description Default df DataFrame DataFrame containing columns to be checked. required prohibited_col_names list [ str ] List of prohibited column names. required Raises: Type Description DataFrameProhibitedColumnError If the prohibited column names are present among the specified DataFrame columns. Source code in quinn/dataframe_validator.py def validate_absence_of_columns(df: DataFrame, prohibited_col_names: list[str]) -> None: \"\"\"Validate that none of the prohibited column names are present among specified DataFrame columns. :param df: DataFrame containing columns to be checked. :param prohibited_col_names: List of prohibited column names. :raises DataFrameProhibitedColumnError: If the prohibited column names are present among the specified DataFrame columns. \"\"\" all_col_names = df.columns extra_col_names = [x for x in all_col_names if x in prohibited_col_names] error_message = f\"The {extra_col_names} columns are not allowed to be included in the DataFrame with the following columns {all_col_names}\" if extra_col_names: raise DataFrameProhibitedColumnError(error_message)","title":"validate_absence_of_columns()"},{"location":"reference/quinn/dataframe_validator/#quinn.dataframe_validator.validate_presence_of_columns","text":"Validate the presence of column names in a DataFrame. Parameters: Name Type Description Default df DataFrame A spark DataFrame. required required_col_names list [ str ] List of the required column names for the DataFrame. required Returns: Type Description None None. Raises: Type Description DataFrameMissingColumnError if any of the requested column names are not present in the DataFrame. Source code in quinn/dataframe_validator.py def validate_presence_of_columns(df: DataFrame, required_col_names: list[str]) -> None: \"\"\"Validate the presence of column names in a DataFrame. :param df: A spark DataFrame. :type df: DataFrame` :param required_col_names: List of the required column names for the DataFrame. :type required_col_names: :py:class:`list` of :py:class:`str` :return: None. :raises DataFrameMissingColumnError: if any of the requested column names are not present in the DataFrame. \"\"\" all_col_names = df.columns missing_col_names = [x for x in required_col_names if x not in all_col_names] error_message = f\"The {missing_col_names} columns are not included in the DataFrame with the following columns {all_col_names}\" if missing_col_names: raise DataFrameMissingColumnError(error_message)","title":"validate_presence_of_columns()"},{"location":"reference/quinn/dataframe_validator/#quinn.dataframe_validator.validate_schema","text":"Function that validate if a given DataFrame has a given StructType as its schema. Parameters: Name Type Description Default df DataFrame DataFrame to validate required required_schema StructType StructType required for the DataFrame required ignore_nullable bool (Optional) A flag for if nullable fields should be ignored during validation False Raises: Type Description DataFrameMissingStructFieldError if any StructFields from the required schema are not included in the DataFrame schema Source code in quinn/dataframe_validator.py def validate_schema( df: DataFrame, required_schema: StructType, ignore_nullable: bool = False, ) -> None: \"\"\"Function that validate if a given DataFrame has a given StructType as its schema. :param df: DataFrame to validate :type df: DataFrame :param required_schema: StructType required for the DataFrame :type required_schema: StructType :param ignore_nullable: (Optional) A flag for if nullable fields should be ignored during validation :type ignore_nullable: bool, optional :raises DataFrameMissingStructFieldError: if any StructFields from the required schema are not included in the DataFrame schema \"\"\" _all_struct_fields = copy.deepcopy(df.schema) _required_schema = copy.deepcopy(required_schema) if ignore_nullable: for x in _all_struct_fields: x.nullable = None for x in _required_schema: x.nullable = None missing_struct_fields = [x for x in _required_schema if x not in _all_struct_fields] error_message = f\"The {missing_struct_fields} StructFields are not included in the DataFrame with the following StructFields {_all_struct_fields}\" if missing_struct_fields: raise DataFrameMissingStructFieldError(error_message)","title":"validate_schema()"},{"location":"reference/quinn/functions/","text":"anti_trim(col) Remove whitespace from the boundaries of col using the regexp_replace function. Parameters: Name Type Description Default col Column Column on which to perform the regexp_replace. required Returns: Type Description Column A new Column with all whitespace removed from the boundaries. Source code in quinn/functions.py def anti_trim(col: Column) -> Column: \"\"\"Remove whitespace from the boundaries of ``col`` using the regexp_replace function. :param col: Column on which to perform the regexp_replace. :type col: Column :return: A new Column with all whitespace removed from the boundaries. :rtype: Column \"\"\" return F.regexp_replace(col, \"\\\\b\\\\s+\\\\b\", \"\") approx_equal(col1, col2, threshold) Compare two Column objects by checking if the difference between them is less than a specified threshold . Parameters: Name Type Description Default col1 Column the first Column required col2 Column the second Column required threshold Number value to compare with required Returns: Type Description Column Boolean Column with True indicating that abs(col1 - col2) is less than threshold Source code in quinn/functions.py def approx_equal(col1: Column, col2: Column, threshold: Number) -> Column: \"\"\"Compare two ``Column`` objects by checking if the difference between them is less than a specified ``threshold``. :param col1: the first ``Column`` :type col1: Column :param col2: the second ``Column`` :type col2: Column :param threshold: value to compare with :type threshold: Number :return: Boolean ``Column`` with ``True`` indicating that ``abs(col1 - col2)`` is less than ``threshold`` \"\"\" return F.abs(col1 - col2) < threshold array_choice(col, seed=None) Returns one random element from the given column. Parameters: Name Type Description Default col Column Column from which element is chosen required Returns: Type Description Column random element from the given column Source code in quinn/functions.py def array_choice(col: Column, seed: int | None = None) -> Column: \"\"\"Returns one random element from the given column. :param col: Column from which element is chosen :type col: Column :return: random element from the given column :rtype: Column \"\"\" index = (F.rand(seed) * F.size(col)).cast(\"int\") return col[index] business_days_between(start_date, end_date) Function takes two Spark Columns and returns a Column with the number of business days between the start and the end date. Parameters: Name Type Description Default start_date Column The column with the start dates required end_date Column The column with the end dates required Returns: Type Description Column a Column with the number of business days between the start and the end date Source code in quinn/functions.py def business_days_between(start_date: Column, end_date: Column) -> Column: # noqa: ARG001 \"\"\"Function takes two Spark `Columns` and returns a `Column` with the number of business days between the start and the end date. :param start_date: The column with the start dates :type start_date: Column :param end_date: The column with the end dates :type end_date: Column :returns: a Column with the number of business days between the start and the end date :rtype: Column \"\"\" all_days = \"sequence(start_date, end_date)\" days_of_week = f\"transform({all_days}, x -> date_format(x, 'E'))\" filter_weekends = F.expr(f\"filter({days_of_week}, x -> x NOT IN ('Sat', 'Sun'))\") num_business_days = F.size(filter_weekends) - 1 return F.when(num_business_days < 0, None).otherwise(num_business_days) exists(f) Create a user-defined function. It takes a list expressed as a column of type ArrayType(AnyType) as an argument and returns a boolean value indicating whether any element in the list is true according to the argument f of the exists() function. Parameters: Name Type Description Default f Callable [[ Any ], bool ] Callable function - A callable function that takes an element of type Any and returns a boolean value. required Returns: Type Description UserDefinedFunction A user-defined function that takes a list expressed as a column of type ArrayType(AnyType) as an argument and returns a boolean value indicating whether any element in the list is true according to the argument f of the exists() function. Source code in quinn/functions.py def exists(f: Callable[[Any], bool]) -> udf: \"\"\"Create a user-defined function. It takes a list expressed as a column of type ``ArrayType(AnyType)`` as an argument and returns a boolean value indicating whether any element in the list is true according to the argument ``f`` of the ``exists()`` function. :param f: Callable function - A callable function that takes an element of type Any and returns a boolean value. :return: A user-defined function that takes a list expressed as a column of type ArrayType(AnyType) as an argument and returns a boolean value indicating whether any element in the list is true according to the argument ``f`` of the ``exists()`` function. :rtype: UserDefinedFunction \"\"\" def temp_udf(list_: list) -> bool: return any(map(f, list_)) return F.udf(temp_udf, BooleanType()) forall(f) The forall function allows for mapping a given boolean function to a list of arguments and return a single boolean value. It does this by creating a Spark UDF which takes in a list of arguments, applying the given boolean function to each element of the list and returning a single boolean value if all the elements pass through the given boolean function. Parameters: Name Type Description Default f Callable [[ Any ], bool ] A callable function f which takes in any type and returns a boolean required Returns: Type Description UserDefinedFunction A spark UDF which accepts a list of arguments and returns True if all elements pass through the given boolean function, False otherwise. Source code in quinn/functions.py def forall(f: Callable[[Any], bool]) -> udf: \"\"\"The **forall** function allows for mapping a given boolean function to a list of arguments and return a single boolean value. It does this by creating a Spark UDF which takes in a list of arguments, applying the given boolean function to each element of the list and returning a single boolean value if all the elements pass through the given boolean function. :param f: A callable function ``f`` which takes in any type and returns a boolean :return: A spark UDF which accepts a list of arguments and returns True if all elements pass through the given boolean function, False otherwise. :rtype: UserDefinedFunction \"\"\" def temp_udf(list_: list) -> bool: return all(map(f, list_)) return F.udf(temp_udf, BooleanType()) multi_equals(value) Create a user-defined function that checks if all the given columns have the designated value. Parameters: Name Type Description Default value Any The designated value. required Returns: Type Description UserDifinedFunction A user-defined function of type BooleanType(). Source code in quinn/functions.py def multi_equals(value: Any) -> udf: # noqa: ANN401 \"\"\"Create a user-defined function that checks if all the given columns have the designated value. :param value: The designated value. :type value: Any :return: A user-defined function of type BooleanType(). :rtype: UserDifinedFunction \"\"\" def temp_udf(*cols) -> bool: # noqa: ANN002 return all(map(lambda col: col == value, cols)) # noqa: C417 return F.udf(temp_udf, BooleanType()) regexp_extract_all(s, regexp) Function uses the Python re library to extract regular expressions from a string ( s ) using a regex pattern ( regexp ). It returns a list of all matches, or None if s is None . Parameters: Name Type Description Default s Column input string ( Column ) required regexp Column string re pattern required Source code in quinn/functions.py @F.udf(returnType=ArrayType(StringType())) def regexp_extract_all(s: Column, regexp: Column) -> Column: \"\"\"Function uses the Python `re` library to extract regular expressions from a string (`s`) using a regex pattern (`regexp`). It returns a list of all matches, or `None` if `s` is `None`. :param s: input string (`Column`) :type s: str :param regexp: string `re` pattern :rtype: Column \"\"\" return None if s is None else re.findall(regexp, s) remove_all_whitespace(col) Function takes a Column object as a parameter and returns a Column object with all white space removed. It does this using the regexp_replace function from F, which replaces all whitespace with an empty string. Parameters: Name Type Description Default col Column a Column object required Returns: Type Description Column a Column object with all white space removed Source code in quinn/functions.py def remove_all_whitespace(col: Column) -> Column: \"\"\"Function takes a `Column` object as a parameter and returns a `Column` object with all white space removed. It does this using the regexp_replace function from F, which replaces all whitespace with an empty string. :param col: a `Column` object :type col: Column :returns: a `Column` object with all white space removed :rtype: Column \"\"\" return F.regexp_replace(col, \"\\\\s+\", \"\") remove_non_word_characters(col) Removes non-word characters from a column. The non-word characters which will be removed are those identified by the regular expression \"[^\\\\w\\\\s]+\" . This expression represents any character that is not a word character (e.g. \\\\w ) or whitespace ( \\\\s ). Parameters: Name Type Description Default col Column A Column object. required Returns: Type Description Column A Column object with non-word characters removed. Source code in quinn/functions.py def remove_non_word_characters(col: Column) -> Column: r\"\"\"Removes non-word characters from a column. The non-word characters which will be removed are those identified by the regular expression ``\"[^\\\\w\\\\s]+\"``. This expression represents any character that is not a word character (e.g. `\\\\w`) or whitespace (`\\\\s`). :param col: A Column object. :return: A Column object with non-word characters removed. \"\"\" return F.regexp_replace(col, \"[^\\\\w\\\\s]+\", \"\") single_space(col) Function takes a column and replaces all the multiple white spaces with a single space. It then trims the column to make all the texts consistent. Parameters: Name Type Description Default col Column The column which needs to be spaced required Returns: Type Description Column A trimmed column with single space Source code in quinn/functions.py def single_space(col: Column) -> Column: \"\"\"Function takes a column and replaces all the multiple white spaces with a single space. It then trims the column to make all the texts consistent. :param col: The column which needs to be spaced :type col: Column :returns: A trimmed column with single space :rtype: Column \"\"\" return F.trim(F.regexp_replace(col, \" +\", \" \")) uuid5(col, namespace=uuid.NAMESPACE_DNS, extra_string='') Function generates UUIDv5 from col and namespace , optionally prepending an extra string to col . Sets variant to RFC 4122 one. Parameters: Name Type Description Default col Column Column that will be hashed. required namespace uuid . UUID Namespace to be used. (default: uuid.NAMESPACE_DNS ) uuid.NAMESPACE_DNS extra_string str In case of collisions one can pass an extra string to hash on. '' Returns: Type Description Column String representation of generated UUIDv5 Source code in quinn/functions.py def uuid5( col: Column, namespace: uuid.UUID = uuid.NAMESPACE_DNS, extra_string: str = \"\", ) -> Column: \"\"\"Function generates UUIDv5 from ``col`` and ``namespace``, optionally prepending an extra string to ``col``. Sets variant to RFC 4122 one. :param col: Column that will be hashed. :type col: Column :param namespace: Namespace to be used. (default: `uuid.NAMESPACE_DNS`) :type namespace: str :param extra_string: In case of collisions one can pass an extra string to hash on. :type extra_string: str :return: String representation of generated UUIDv5 :rtype: Column \"\"\" ns = F.lit(namespace.bytes) salted_col = F.concat(F.lit(extra_string), col) encoded = F.encode(salted_col, \"utf-8\") encoded_with_ns = F.concat(ns, encoded) hashed = F.sha1(encoded_with_ns) variant_part = F.substring(hashed, 17, 4) variant_part = F.conv(variant_part, 16, 2) variant_part = F.lpad(variant_part, 16, \"0\") variant_part = F.concat( F.lit(\"10\"), F.substring(variant_part, 3, 16), ) # RFC 4122 variant. variant_part = F.lower(F.conv(variant_part, 2, 16)) return F.concat_ws( \"-\", F.substring(hashed, 1, 8), F.substring(hashed, 9, 4), F.concat(F.lit(\"5\"), F.substring(hashed, 14, 3)), # Set version. variant_part, F.substring(hashed, 21, 12), ) week_end_date(col, week_end_day='Sat') Return a date column for the end of week for a given day. The Spark function dayofweek considers Sunday as the first day of the week, and uses the default value of 1 to indicate Sunday. Usage of the when and otherwise functions allow a comparison between the end of week day indicated and the day of week computed, and the return of the reference date if they match or the the addition of one week to the reference date otherwise. Parameters: Name Type Description Default col Column The reference date column. required week_end_day str The week end day (default: 'Sat') 'Sat' Returns: Type Description Column A Column of end of the week dates. Source code in quinn/functions.py def week_end_date(col: Column, week_end_day: str = \"Sat\") -> Column: \"\"\"Return a date column for the end of week for a given day. The Spark function `dayofweek` considers Sunday as the first day of the week, and uses the default value of 1 to indicate Sunday. Usage of the `when` and `otherwise` functions allow a comparison between the end of week day indicated and the day of week computed, and the return of the reference date if they match or the the addition of one week to the reference date otherwise. :param col: The reference date column. :type col: Column :param week_end_day: The week end day (default: 'Sat') :type week_end_day: str :return: A Column of end of the week dates. :rtype: Column \"\"\" _raise_if_invalid_day(week_end_day) # these are the default Spark mappings. Spark considers Sunday the first day of the week. day_of_week_mapping = { \"Sun\": 1, \"Mon\": 2, \"Tue\": 3, \"Wed\": 4, \"Thu\": 5, \"Fri\": 6, \"Sat\": 7, } return F.when( F.dayofweek(col).eqNullSafe(F.lit(day_of_week_mapping[week_end_day])), col, ).otherwise(F.next_day(col, week_end_day)) week_start_date(col, week_start_day='Sun') Function takes a Spark Column and an optional week_start_day argument and returns a Column with the corresponding start of week dates. The \"standard week\" in Spark starts on Sunday, however an optional argument can be used to start the week from a different day, e.g. Monday. The week_start_day argument is a string corresponding to the day of the week to start the week from, e.g. \"Mon\" , \"Tue\" , and must be in the set: {\"Sun\", \"Mon\", \"Tue\", \"Wed\", \"Thu\", \"Fri\", \"Sat\"} . If the argument given is not a valid day then a ValueError will be raised. Parameters: Name Type Description Default col Column The column to determine start of week dates on required week_start_day str The day to start the week on 'Sun' Returns: Type Description Column A Column with start of week dates Source code in quinn/functions.py def week_start_date(col: Column, week_start_day: str = \"Sun\") -> Column: \"\"\"Function takes a Spark `Column` and an optional `week_start_day` argument and returns a `Column` with the corresponding start of week dates. The \"standard week\" in Spark starts on Sunday, however an optional argument can be used to start the week from a different day, e.g. Monday. The `week_start_day` argument is a string corresponding to the day of the week to start the week from, e.g. `\"Mon\"`, `\"Tue\"`, and must be in the set: `{\"Sun\", \"Mon\", \"Tue\", \"Wed\", \"Thu\", \"Fri\", \"Sat\"}`. If the argument given is not a valid day then a `ValueError` will be raised. :param col: The column to determine start of week dates on :type col: Column :param week_start_day: The day to start the week on :type week_start_day: str :returns: A Column with start of week dates :rtype: Column \"\"\" _raise_if_invalid_day(week_start_day) # the \"standard week\" in Spark is from Sunday to Saturday mapping = { \"Sun\": \"Sat\", \"Mon\": \"Sun\", \"Tue\": \"Mon\", \"Wed\": \"Tue\", \"Thu\": \"Wed\", \"Fri\": \"Thu\", \"Sat\": \"Fri\", } end = week_end_date(col, mapping[week_start_day]) return F.date_add(end, -6)","title":"Functions"},{"location":"reference/quinn/functions/#quinn.functions.anti_trim","text":"Remove whitespace from the boundaries of col using the regexp_replace function. Parameters: Name Type Description Default col Column Column on which to perform the regexp_replace. required Returns: Type Description Column A new Column with all whitespace removed from the boundaries. Source code in quinn/functions.py def anti_trim(col: Column) -> Column: \"\"\"Remove whitespace from the boundaries of ``col`` using the regexp_replace function. :param col: Column on which to perform the regexp_replace. :type col: Column :return: A new Column with all whitespace removed from the boundaries. :rtype: Column \"\"\" return F.regexp_replace(col, \"\\\\b\\\\s+\\\\b\", \"\")","title":"anti_trim()"},{"location":"reference/quinn/functions/#quinn.functions.approx_equal","text":"Compare two Column objects by checking if the difference between them is less than a specified threshold . Parameters: Name Type Description Default col1 Column the first Column required col2 Column the second Column required threshold Number value to compare with required Returns: Type Description Column Boolean Column with True indicating that abs(col1 - col2) is less than threshold Source code in quinn/functions.py def approx_equal(col1: Column, col2: Column, threshold: Number) -> Column: \"\"\"Compare two ``Column`` objects by checking if the difference between them is less than a specified ``threshold``. :param col1: the first ``Column`` :type col1: Column :param col2: the second ``Column`` :type col2: Column :param threshold: value to compare with :type threshold: Number :return: Boolean ``Column`` with ``True`` indicating that ``abs(col1 - col2)`` is less than ``threshold`` \"\"\" return F.abs(col1 - col2) < threshold","title":"approx_equal()"},{"location":"reference/quinn/functions/#quinn.functions.array_choice","text":"Returns one random element from the given column. Parameters: Name Type Description Default col Column Column from which element is chosen required Returns: Type Description Column random element from the given column Source code in quinn/functions.py def array_choice(col: Column, seed: int | None = None) -> Column: \"\"\"Returns one random element from the given column. :param col: Column from which element is chosen :type col: Column :return: random element from the given column :rtype: Column \"\"\" index = (F.rand(seed) * F.size(col)).cast(\"int\") return col[index]","title":"array_choice()"},{"location":"reference/quinn/functions/#quinn.functions.business_days_between","text":"Function takes two Spark Columns and returns a Column with the number of business days between the start and the end date. Parameters: Name Type Description Default start_date Column The column with the start dates required end_date Column The column with the end dates required Returns: Type Description Column a Column with the number of business days between the start and the end date Source code in quinn/functions.py def business_days_between(start_date: Column, end_date: Column) -> Column: # noqa: ARG001 \"\"\"Function takes two Spark `Columns` and returns a `Column` with the number of business days between the start and the end date. :param start_date: The column with the start dates :type start_date: Column :param end_date: The column with the end dates :type end_date: Column :returns: a Column with the number of business days between the start and the end date :rtype: Column \"\"\" all_days = \"sequence(start_date, end_date)\" days_of_week = f\"transform({all_days}, x -> date_format(x, 'E'))\" filter_weekends = F.expr(f\"filter({days_of_week}, x -> x NOT IN ('Sat', 'Sun'))\") num_business_days = F.size(filter_weekends) - 1 return F.when(num_business_days < 0, None).otherwise(num_business_days)","title":"business_days_between()"},{"location":"reference/quinn/functions/#quinn.functions.exists","text":"Create a user-defined function. It takes a list expressed as a column of type ArrayType(AnyType) as an argument and returns a boolean value indicating whether any element in the list is true according to the argument f of the exists() function. Parameters: Name Type Description Default f Callable [[ Any ], bool ] Callable function - A callable function that takes an element of type Any and returns a boolean value. required Returns: Type Description UserDefinedFunction A user-defined function that takes a list expressed as a column of type ArrayType(AnyType) as an argument and returns a boolean value indicating whether any element in the list is true according to the argument f of the exists() function. Source code in quinn/functions.py def exists(f: Callable[[Any], bool]) -> udf: \"\"\"Create a user-defined function. It takes a list expressed as a column of type ``ArrayType(AnyType)`` as an argument and returns a boolean value indicating whether any element in the list is true according to the argument ``f`` of the ``exists()`` function. :param f: Callable function - A callable function that takes an element of type Any and returns a boolean value. :return: A user-defined function that takes a list expressed as a column of type ArrayType(AnyType) as an argument and returns a boolean value indicating whether any element in the list is true according to the argument ``f`` of the ``exists()`` function. :rtype: UserDefinedFunction \"\"\" def temp_udf(list_: list) -> bool: return any(map(f, list_)) return F.udf(temp_udf, BooleanType())","title":"exists()"},{"location":"reference/quinn/functions/#quinn.functions.forall","text":"The forall function allows for mapping a given boolean function to a list of arguments and return a single boolean value. It does this by creating a Spark UDF which takes in a list of arguments, applying the given boolean function to each element of the list and returning a single boolean value if all the elements pass through the given boolean function. Parameters: Name Type Description Default f Callable [[ Any ], bool ] A callable function f which takes in any type and returns a boolean required Returns: Type Description UserDefinedFunction A spark UDF which accepts a list of arguments and returns True if all elements pass through the given boolean function, False otherwise. Source code in quinn/functions.py def forall(f: Callable[[Any], bool]) -> udf: \"\"\"The **forall** function allows for mapping a given boolean function to a list of arguments and return a single boolean value. It does this by creating a Spark UDF which takes in a list of arguments, applying the given boolean function to each element of the list and returning a single boolean value if all the elements pass through the given boolean function. :param f: A callable function ``f`` which takes in any type and returns a boolean :return: A spark UDF which accepts a list of arguments and returns True if all elements pass through the given boolean function, False otherwise. :rtype: UserDefinedFunction \"\"\" def temp_udf(list_: list) -> bool: return all(map(f, list_)) return F.udf(temp_udf, BooleanType())","title":"forall()"},{"location":"reference/quinn/functions/#quinn.functions.multi_equals","text":"Create a user-defined function that checks if all the given columns have the designated value. Parameters: Name Type Description Default value Any The designated value. required Returns: Type Description UserDifinedFunction A user-defined function of type BooleanType(). Source code in quinn/functions.py def multi_equals(value: Any) -> udf: # noqa: ANN401 \"\"\"Create a user-defined function that checks if all the given columns have the designated value. :param value: The designated value. :type value: Any :return: A user-defined function of type BooleanType(). :rtype: UserDifinedFunction \"\"\" def temp_udf(*cols) -> bool: # noqa: ANN002 return all(map(lambda col: col == value, cols)) # noqa: C417 return F.udf(temp_udf, BooleanType())","title":"multi_equals()"},{"location":"reference/quinn/functions/#quinn.functions.regexp_extract_all","text":"Function uses the Python re library to extract regular expressions from a string ( s ) using a regex pattern ( regexp ). It returns a list of all matches, or None if s is None . Parameters: Name Type Description Default s Column input string ( Column ) required regexp Column string re pattern required Source code in quinn/functions.py @F.udf(returnType=ArrayType(StringType())) def regexp_extract_all(s: Column, regexp: Column) -> Column: \"\"\"Function uses the Python `re` library to extract regular expressions from a string (`s`) using a regex pattern (`regexp`). It returns a list of all matches, or `None` if `s` is `None`. :param s: input string (`Column`) :type s: str :param regexp: string `re` pattern :rtype: Column \"\"\" return None if s is None else re.findall(regexp, s)","title":"regexp_extract_all()"},{"location":"reference/quinn/functions/#quinn.functions.remove_all_whitespace","text":"Function takes a Column object as a parameter and returns a Column object with all white space removed. It does this using the regexp_replace function from F, which replaces all whitespace with an empty string. Parameters: Name Type Description Default col Column a Column object required Returns: Type Description Column a Column object with all white space removed Source code in quinn/functions.py def remove_all_whitespace(col: Column) -> Column: \"\"\"Function takes a `Column` object as a parameter and returns a `Column` object with all white space removed. It does this using the regexp_replace function from F, which replaces all whitespace with an empty string. :param col: a `Column` object :type col: Column :returns: a `Column` object with all white space removed :rtype: Column \"\"\" return F.regexp_replace(col, \"\\\\s+\", \"\")","title":"remove_all_whitespace()"},{"location":"reference/quinn/functions/#quinn.functions.remove_non_word_characters","text":"Removes non-word characters from a column. The non-word characters which will be removed are those identified by the regular expression \"[^\\\\w\\\\s]+\" . This expression represents any character that is not a word character (e.g. \\\\w ) or whitespace ( \\\\s ). Parameters: Name Type Description Default col Column A Column object. required Returns: Type Description Column A Column object with non-word characters removed. Source code in quinn/functions.py def remove_non_word_characters(col: Column) -> Column: r\"\"\"Removes non-word characters from a column. The non-word characters which will be removed are those identified by the regular expression ``\"[^\\\\w\\\\s]+\"``. This expression represents any character that is not a word character (e.g. `\\\\w`) or whitespace (`\\\\s`). :param col: A Column object. :return: A Column object with non-word characters removed. \"\"\" return F.regexp_replace(col, \"[^\\\\w\\\\s]+\", \"\")","title":"remove_non_word_characters()"},{"location":"reference/quinn/functions/#quinn.functions.single_space","text":"Function takes a column and replaces all the multiple white spaces with a single space. It then trims the column to make all the texts consistent. Parameters: Name Type Description Default col Column The column which needs to be spaced required Returns: Type Description Column A trimmed column with single space Source code in quinn/functions.py def single_space(col: Column) -> Column: \"\"\"Function takes a column and replaces all the multiple white spaces with a single space. It then trims the column to make all the texts consistent. :param col: The column which needs to be spaced :type col: Column :returns: A trimmed column with single space :rtype: Column \"\"\" return F.trim(F.regexp_replace(col, \" +\", \" \"))","title":"single_space()"},{"location":"reference/quinn/functions/#quinn.functions.uuid5","text":"Function generates UUIDv5 from col and namespace , optionally prepending an extra string to col . Sets variant to RFC 4122 one. Parameters: Name Type Description Default col Column Column that will be hashed. required namespace uuid . UUID Namespace to be used. (default: uuid.NAMESPACE_DNS ) uuid.NAMESPACE_DNS extra_string str In case of collisions one can pass an extra string to hash on. '' Returns: Type Description Column String representation of generated UUIDv5 Source code in quinn/functions.py def uuid5( col: Column, namespace: uuid.UUID = uuid.NAMESPACE_DNS, extra_string: str = \"\", ) -> Column: \"\"\"Function generates UUIDv5 from ``col`` and ``namespace``, optionally prepending an extra string to ``col``. Sets variant to RFC 4122 one. :param col: Column that will be hashed. :type col: Column :param namespace: Namespace to be used. (default: `uuid.NAMESPACE_DNS`) :type namespace: str :param extra_string: In case of collisions one can pass an extra string to hash on. :type extra_string: str :return: String representation of generated UUIDv5 :rtype: Column \"\"\" ns = F.lit(namespace.bytes) salted_col = F.concat(F.lit(extra_string), col) encoded = F.encode(salted_col, \"utf-8\") encoded_with_ns = F.concat(ns, encoded) hashed = F.sha1(encoded_with_ns) variant_part = F.substring(hashed, 17, 4) variant_part = F.conv(variant_part, 16, 2) variant_part = F.lpad(variant_part, 16, \"0\") variant_part = F.concat( F.lit(\"10\"), F.substring(variant_part, 3, 16), ) # RFC 4122 variant. variant_part = F.lower(F.conv(variant_part, 2, 16)) return F.concat_ws( \"-\", F.substring(hashed, 1, 8), F.substring(hashed, 9, 4), F.concat(F.lit(\"5\"), F.substring(hashed, 14, 3)), # Set version. variant_part, F.substring(hashed, 21, 12), )","title":"uuid5()"},{"location":"reference/quinn/functions/#quinn.functions.week_end_date","text":"Return a date column for the end of week for a given day. The Spark function dayofweek considers Sunday as the first day of the week, and uses the default value of 1 to indicate Sunday. Usage of the when and otherwise functions allow a comparison between the end of week day indicated and the day of week computed, and the return of the reference date if they match or the the addition of one week to the reference date otherwise. Parameters: Name Type Description Default col Column The reference date column. required week_end_day str The week end day (default: 'Sat') 'Sat' Returns: Type Description Column A Column of end of the week dates. Source code in quinn/functions.py def week_end_date(col: Column, week_end_day: str = \"Sat\") -> Column: \"\"\"Return a date column for the end of week for a given day. The Spark function `dayofweek` considers Sunday as the first day of the week, and uses the default value of 1 to indicate Sunday. Usage of the `when` and `otherwise` functions allow a comparison between the end of week day indicated and the day of week computed, and the return of the reference date if they match or the the addition of one week to the reference date otherwise. :param col: The reference date column. :type col: Column :param week_end_day: The week end day (default: 'Sat') :type week_end_day: str :return: A Column of end of the week dates. :rtype: Column \"\"\" _raise_if_invalid_day(week_end_day) # these are the default Spark mappings. Spark considers Sunday the first day of the week. day_of_week_mapping = { \"Sun\": 1, \"Mon\": 2, \"Tue\": 3, \"Wed\": 4, \"Thu\": 5, \"Fri\": 6, \"Sat\": 7, } return F.when( F.dayofweek(col).eqNullSafe(F.lit(day_of_week_mapping[week_end_day])), col, ).otherwise(F.next_day(col, week_end_day))","title":"week_end_date()"},{"location":"reference/quinn/functions/#quinn.functions.week_start_date","text":"Function takes a Spark Column and an optional week_start_day argument and returns a Column with the corresponding start of week dates. The \"standard week\" in Spark starts on Sunday, however an optional argument can be used to start the week from a different day, e.g. Monday. The week_start_day argument is a string corresponding to the day of the week to start the week from, e.g. \"Mon\" , \"Tue\" , and must be in the set: {\"Sun\", \"Mon\", \"Tue\", \"Wed\", \"Thu\", \"Fri\", \"Sat\"} . If the argument given is not a valid day then a ValueError will be raised. Parameters: Name Type Description Default col Column The column to determine start of week dates on required week_start_day str The day to start the week on 'Sun' Returns: Type Description Column A Column with start of week dates Source code in quinn/functions.py def week_start_date(col: Column, week_start_day: str = \"Sun\") -> Column: \"\"\"Function takes a Spark `Column` and an optional `week_start_day` argument and returns a `Column` with the corresponding start of week dates. The \"standard week\" in Spark starts on Sunday, however an optional argument can be used to start the week from a different day, e.g. Monday. The `week_start_day` argument is a string corresponding to the day of the week to start the week from, e.g. `\"Mon\"`, `\"Tue\"`, and must be in the set: `{\"Sun\", \"Mon\", \"Tue\", \"Wed\", \"Thu\", \"Fri\", \"Sat\"}`. If the argument given is not a valid day then a `ValueError` will be raised. :param col: The column to determine start of week dates on :type col: Column :param week_start_day: The day to start the week on :type week_start_day: str :returns: A Column with start of week dates :rtype: Column \"\"\" _raise_if_invalid_day(week_start_day) # the \"standard week\" in Spark is from Sunday to Saturday mapping = { \"Sun\": \"Sat\", \"Mon\": \"Sun\", \"Tue\": \"Mon\", \"Wed\": \"Tue\", \"Thu\": \"Wed\", \"Fri\": \"Thu\", \"Sat\": \"Fri\", } end = week_end_date(col, mapping[week_start_day]) return F.date_add(end, -6)","title":"week_start_date()"},{"location":"reference/quinn/schema_helpers/","text":"complex_fields(schema) Returns a dictionary of complex field names and their data types from the input DataFrame's schema. Parameters: Name Type Description Default df DataFrame The input PySpark DataFrame. required Returns: Type Description Dict[str, object] A dictionary with complex field names as keys and their respective data types as values. Source code in quinn/schema_helpers.py def complex_fields(schema: T.StructType) -> dict[str, object]: \"\"\"Returns a dictionary of complex field names and their data types from the input DataFrame's schema. :param df: The input PySpark DataFrame. :type df: DataFrame :return: A dictionary with complex field names as keys and their respective data types as values. :rtype: Dict[str, object] \"\"\" return { field.name: field.dataType for field in schema.fields if isinstance(field.dataType, (T.ArrayType, T.StructType, T.MapType)) } print_schema_as_code(dtype) Represent DataType (including StructType) as valid Python code. Parameters: Name Type Description Default dtype T . DataType The input DataType or Schema object required Returns: Type Description str A valid python code which generate the same schema. Source code in quinn/schema_helpers.py def print_schema_as_code(dtype: T.DataType) -> str: \"\"\"Represent DataType (including StructType) as valid Python code. :param dtype: The input DataType or Schema object :type dtype: pyspark.sql.types.DataType :return: A valid python code which generate the same schema. :rtype: str \"\"\" res = [] if isinstance(dtype, T.StructType): res.append(\"StructType(\\n\\tfields=[\") for field in dtype.fields: for line in _repr_column(field).split(\"\\n\"): res.append(\"\\n\\t\\t\") res.append(line) res.append(\",\") res.append(\"\\n\\t]\\n)\") elif isinstance(dtype, T.ArrayType): res.append(\"ArrayType(\") res.append(print_schema_as_code(dtype.elementType)) res.append(\")\") elif isinstance(dtype, T.MapType): res.append(\"MapType(\") res.append(f\"\\n\\t{print_schema_as_code(dtype.keyType)},\") for line in print_schema_as_code(dtype.valueType).split(\"\\n\"): res.append(\"\\n\\t\") res.append(line) res.append(\",\") res.append(f\"\\n\\t{dtype.valueContainsNull},\") res.append(\"\\n)\") elif isinstance(dtype, T.DecimalType): res.append(f\"DecimalType({dtype.precision}, {dtype.scale})\") elif str(dtype).endswith(\"()\"): # PySpark 3.3+ res.append(str(dtype)) else: res.append(f\"{dtype}()\") return \"\".join(res) schema_from_csv(spark, file_path) Return a StructType from a CSV file containing schema configuration. Parameters: Name Type Description Default spark SparkSession The SparkSession object required file_path str The path to the CSV file containing the schema configuration required Returns: Type Description pyspark.sql.types.StructType A StructType object representing the schema configuration Raises: Type Description ValueError If the CSV file does not contain the expected columns: name, type, nullable, description Source code in quinn/schema_helpers.py def schema_from_csv(spark: SparkSession, file_path: str) -> T.StructType: # noqa: C901 \"\"\"Return a StructType from a CSV file containing schema configuration. :param spark: The SparkSession object :type spark: pyspark.sql.session.SparkSession :param file_path: The path to the CSV file containing the schema configuration :type file_path: str :raises ValueError: If the CSV file does not contain the expected columns: name, type, nullable, description :return: A StructType object representing the schema configuration :rtype: pyspark.sql.types.StructType \"\"\" def _validate_json(metadata: str) -> dict: if metadata is None: return {} try: metadata_dict = json.loads(metadata) except json.JSONDecodeError as exc: msg = f\"Invalid JSON: {metadata}\" raise ValueError(msg) from exc return metadata_dict def _lookup_type(type_str: str) -> T.DataType: type_lookup = { \"string\": T.StringType(), \"int\": T.IntegerType(), \"float\": T.FloatType(), \"double\": T.DoubleType(), \"boolean\": T.BooleanType(), \"bool\": T.BooleanType(), \"timestamp\": T.TimestampType(), \"date\": T.DateType(), \"binary\": T.BinaryType(), } if type_str not in type_lookup: msg = f\"Invalid type: {type_str}. Expecting one of: {type_lookup.keys()}\" raise ValueError(msg) return type_lookup[type_str] def _convert_nullable(null_str: str) -> bool: if null_str is None: return True parsed_val = null_str.lower() if parsed_val not in [\"true\", \"false\"]: msg = f\"Invalid nullable value: {null_str}. Expecting True or False.\" raise ValueError(msg) return parsed_val == \"true\" schema_df = spark.read.csv(file_path, header=True) possible_columns = [\"name\", \"type\", \"nullable\", \"metadata\"] num_cols = len(schema_df.columns) expected_columns = possible_columns[0:num_cols] # ensure that csv contains the expected columns: name, type, nullable, description if schema_df.columns != expected_columns: msg = f\"CSV must contain columns in this order: {expected_columns}\" raise ValueError(msg) # create a StructType per field fields = [] for row in schema_df.collect(): field = T.StructField( name=row[\"name\"], dataType=_lookup_type(row[\"type\"]), nullable=_convert_nullable(row[\"nullable\"]) if \"nullable\" in row else True, metadata=_validate_json(row[\"metadata\"] if \"metadata\" in row else None), ) fields.append(field) return T.StructType(fields=fields)","title":"Schema helpers"},{"location":"reference/quinn/schema_helpers/#quinn.schema_helpers.complex_fields","text":"Returns a dictionary of complex field names and their data types from the input DataFrame's schema. Parameters: Name Type Description Default df DataFrame The input PySpark DataFrame. required Returns: Type Description Dict[str, object] A dictionary with complex field names as keys and their respective data types as values. Source code in quinn/schema_helpers.py def complex_fields(schema: T.StructType) -> dict[str, object]: \"\"\"Returns a dictionary of complex field names and their data types from the input DataFrame's schema. :param df: The input PySpark DataFrame. :type df: DataFrame :return: A dictionary with complex field names as keys and their respective data types as values. :rtype: Dict[str, object] \"\"\" return { field.name: field.dataType for field in schema.fields if isinstance(field.dataType, (T.ArrayType, T.StructType, T.MapType)) }","title":"complex_fields()"},{"location":"reference/quinn/schema_helpers/#quinn.schema_helpers.print_schema_as_code","text":"Represent DataType (including StructType) as valid Python code. Parameters: Name Type Description Default dtype T . DataType The input DataType or Schema object required Returns: Type Description str A valid python code which generate the same schema. Source code in quinn/schema_helpers.py def print_schema_as_code(dtype: T.DataType) -> str: \"\"\"Represent DataType (including StructType) as valid Python code. :param dtype: The input DataType or Schema object :type dtype: pyspark.sql.types.DataType :return: A valid python code which generate the same schema. :rtype: str \"\"\" res = [] if isinstance(dtype, T.StructType): res.append(\"StructType(\\n\\tfields=[\") for field in dtype.fields: for line in _repr_column(field).split(\"\\n\"): res.append(\"\\n\\t\\t\") res.append(line) res.append(\",\") res.append(\"\\n\\t]\\n)\") elif isinstance(dtype, T.ArrayType): res.append(\"ArrayType(\") res.append(print_schema_as_code(dtype.elementType)) res.append(\")\") elif isinstance(dtype, T.MapType): res.append(\"MapType(\") res.append(f\"\\n\\t{print_schema_as_code(dtype.keyType)},\") for line in print_schema_as_code(dtype.valueType).split(\"\\n\"): res.append(\"\\n\\t\") res.append(line) res.append(\",\") res.append(f\"\\n\\t{dtype.valueContainsNull},\") res.append(\"\\n)\") elif isinstance(dtype, T.DecimalType): res.append(f\"DecimalType({dtype.precision}, {dtype.scale})\") elif str(dtype).endswith(\"()\"): # PySpark 3.3+ res.append(str(dtype)) else: res.append(f\"{dtype}()\") return \"\".join(res)","title":"print_schema_as_code()"},{"location":"reference/quinn/schema_helpers/#quinn.schema_helpers.schema_from_csv","text":"Return a StructType from a CSV file containing schema configuration. Parameters: Name Type Description Default spark SparkSession The SparkSession object required file_path str The path to the CSV file containing the schema configuration required Returns: Type Description pyspark.sql.types.StructType A StructType object representing the schema configuration Raises: Type Description ValueError If the CSV file does not contain the expected columns: name, type, nullable, description Source code in quinn/schema_helpers.py def schema_from_csv(spark: SparkSession, file_path: str) -> T.StructType: # noqa: C901 \"\"\"Return a StructType from a CSV file containing schema configuration. :param spark: The SparkSession object :type spark: pyspark.sql.session.SparkSession :param file_path: The path to the CSV file containing the schema configuration :type file_path: str :raises ValueError: If the CSV file does not contain the expected columns: name, type, nullable, description :return: A StructType object representing the schema configuration :rtype: pyspark.sql.types.StructType \"\"\" def _validate_json(metadata: str) -> dict: if metadata is None: return {} try: metadata_dict = json.loads(metadata) except json.JSONDecodeError as exc: msg = f\"Invalid JSON: {metadata}\" raise ValueError(msg) from exc return metadata_dict def _lookup_type(type_str: str) -> T.DataType: type_lookup = { \"string\": T.StringType(), \"int\": T.IntegerType(), \"float\": T.FloatType(), \"double\": T.DoubleType(), \"boolean\": T.BooleanType(), \"bool\": T.BooleanType(), \"timestamp\": T.TimestampType(), \"date\": T.DateType(), \"binary\": T.BinaryType(), } if type_str not in type_lookup: msg = f\"Invalid type: {type_str}. Expecting one of: {type_lookup.keys()}\" raise ValueError(msg) return type_lookup[type_str] def _convert_nullable(null_str: str) -> bool: if null_str is None: return True parsed_val = null_str.lower() if parsed_val not in [\"true\", \"false\"]: msg = f\"Invalid nullable value: {null_str}. Expecting True or False.\" raise ValueError(msg) return parsed_val == \"true\" schema_df = spark.read.csv(file_path, header=True) possible_columns = [\"name\", \"type\", \"nullable\", \"metadata\"] num_cols = len(schema_df.columns) expected_columns = possible_columns[0:num_cols] # ensure that csv contains the expected columns: name, type, nullable, description if schema_df.columns != expected_columns: msg = f\"CSV must contain columns in this order: {expected_columns}\" raise ValueError(msg) # create a StructType per field fields = [] for row in schema_df.collect(): field = T.StructField( name=row[\"name\"], dataType=_lookup_type(row[\"type\"]), nullable=_convert_nullable(row[\"nullable\"]) if \"nullable\" in row else True, metadata=_validate_json(row[\"metadata\"] if \"metadata\" in row else None), ) fields.append(field) return T.StructType(fields=fields)","title":"schema_from_csv()"},{"location":"reference/quinn/spark/","text":"SparkProvider Class for creating and destroying SparkSession. Source code in quinn/spark.py class SparkProvider: \"\"\"Class for creating and destroying SparkSession.\"\"\" def __init__( self: SparkProvider, app_name: str, conf: SparkConf | None = None, extra_dependencies: list[str] | None = None, extra_files: list[str] | None = None, ) -> None: \"\"\"Initialize SparkSession.\"\"\" self.spark = self.set_up_spark( app_name, self.master, conf, extra_dependencies, extra_files, ) @property def master(self: SparkProvider) -> str: # noqa: D102 return os.getenv(\"SPARK_MASTER\", STANDALONE) @staticmethod def set_up_spark( # noqa: D102 app_name: str, master: str = STANDALONE, conf: SparkConf = None, extra_dependencies: list[str] | None = None, extra_files: list[str] | None = None, ) -> SparkSession: conf = conf if conf else SparkConf() if extra_dependencies: spark_dependencies = \",\".join(extra_dependencies) conf.set(\"spark.jars.packages\", spark_dependencies) spark = ( SparkSession.builder.appName(app_name) .master(master) .config(conf=conf) .getOrCreate() ) extra_files = extra_files if extra_files else [] for extra_file in extra_files: spark.sparkContext.addPyFile(extra_file) quiet_py4j() return spark @staticmethod def tear_down_spark(spark: SparkSession) -> None: # noqa: D102 spark.stop() # To avoid Akka rebinding to the same port, since it doesn't unbind # immediately on shutdown spark._jvm.System.clearProperty(\"spark.driver.port\") # noqa: SLF001 __init__(app_name, conf=None, extra_dependencies=None, extra_files=None) Initialize SparkSession. Source code in quinn/spark.py def __init__( self: SparkProvider, app_name: str, conf: SparkConf | None = None, extra_dependencies: list[str] | None = None, extra_files: list[str] | None = None, ) -> None: \"\"\"Initialize SparkSession.\"\"\" self.spark = self.set_up_spark( app_name, self.master, conf, extra_dependencies, extra_files, ) quiet_py4j() Sets logging level of py4h. Source code in quinn/spark.py def quiet_py4j() -> None: \"\"\"Sets logging level of py4h.\"\"\" logger = logging.getLogger(\"py4j\") logger.setLevel(logging.INFO)","title":"Spark"},{"location":"reference/quinn/spark/#quinn.spark.SparkProvider","text":"Class for creating and destroying SparkSession. Source code in quinn/spark.py class SparkProvider: \"\"\"Class for creating and destroying SparkSession.\"\"\" def __init__( self: SparkProvider, app_name: str, conf: SparkConf | None = None, extra_dependencies: list[str] | None = None, extra_files: list[str] | None = None, ) -> None: \"\"\"Initialize SparkSession.\"\"\" self.spark = self.set_up_spark( app_name, self.master, conf, extra_dependencies, extra_files, ) @property def master(self: SparkProvider) -> str: # noqa: D102 return os.getenv(\"SPARK_MASTER\", STANDALONE) @staticmethod def set_up_spark( # noqa: D102 app_name: str, master: str = STANDALONE, conf: SparkConf = None, extra_dependencies: list[str] | None = None, extra_files: list[str] | None = None, ) -> SparkSession: conf = conf if conf else SparkConf() if extra_dependencies: spark_dependencies = \",\".join(extra_dependencies) conf.set(\"spark.jars.packages\", spark_dependencies) spark = ( SparkSession.builder.appName(app_name) .master(master) .config(conf=conf) .getOrCreate() ) extra_files = extra_files if extra_files else [] for extra_file in extra_files: spark.sparkContext.addPyFile(extra_file) quiet_py4j() return spark @staticmethod def tear_down_spark(spark: SparkSession) -> None: # noqa: D102 spark.stop() # To avoid Akka rebinding to the same port, since it doesn't unbind # immediately on shutdown spark._jvm.System.clearProperty(\"spark.driver.port\") # noqa: SLF001","title":"SparkProvider"},{"location":"reference/quinn/spark/#quinn.spark.SparkProvider.__init__","text":"Initialize SparkSession. Source code in quinn/spark.py def __init__( self: SparkProvider, app_name: str, conf: SparkConf | None = None, extra_dependencies: list[str] | None = None, extra_files: list[str] | None = None, ) -> None: \"\"\"Initialize SparkSession.\"\"\" self.spark = self.set_up_spark( app_name, self.master, conf, extra_dependencies, extra_files, )","title":"__init__()"},{"location":"reference/quinn/spark/#quinn.spark.quiet_py4j","text":"Sets logging level of py4h. Source code in quinn/spark.py def quiet_py4j() -> None: \"\"\"Sets logging level of py4h.\"\"\" logger = logging.getLogger(\"py4j\") logger.setLevel(logging.INFO)","title":"quiet_py4j()"},{"location":"reference/quinn/split_columns/","text":"split_col(df, col_name, delimiter, new_col_names, mode='permissive', default=None) Splits the given column based on the delimiter and creates new columns with the split values. Parameters: Name Type Description Default df DataFrame The input DataFrame required col_name str The name of the column to split required delimiter str The delimiter to split the column on required new_col_names list [ str ] A list of two strings for the new column names required mode str The split mode. Can be \"strict\" or \"permissive\". Default is \"permissive\" 'permissive' default Optional [ str ] If the mode is \"permissive\" then default value will be assigned to column None Returns: Type Description pyspark.sql.DataFrame. dataframe: The resulting DataFrame with the split columns Source code in quinn/split_columns.py def split_col( # noqa: PLR0913 df: DataFrame, col_name: str, delimiter: str, new_col_names: list[str], mode: str = \"permissive\", default: Optional[str] = None, ) -> DataFrame: \"\"\"Splits the given column based on the delimiter and creates new columns with the split values. :param df: The input DataFrame :type df: pyspark.sql.DataFrame :param col_name: The name of the column to split :type col_name: str :param delimiter: The delimiter to split the column on :type delimiter: str :param new_col_names: A list of two strings for the new column names :type new_col_names: (List[str]) :param mode: The split mode. Can be \"strict\" or \"permissive\". Default is \"permissive\" :type mode: str :param default: If the mode is \"permissive\" then default value will be assigned to column :type mode: str :return: dataframe: The resulting DataFrame with the split columns :rtype: pyspark.sql.DataFrame. \"\"\" # Check if the column to be split exists in the DataFrame if col_name not in df.columns: msg = f\"Column '{col_name}' not found in DataFrame.\" raise ValueError(msg) # Check if the delimiter is a string if not isinstance(delimiter, str): msg = \"Delimiter must be a string.\" raise TypeError(msg) # Check if the new column names are a list of strings if not isinstance(new_col_names, list): msg = \"New column names must be a list of strings.\" raise TypeError(msg) # Define a UDF to check the occurrence of delimitter def _num_delimiter(col_value1: str) -> int: # Get the count of delimiter and store the result in no_of_delimiter no_of_delimiter = col_value1.count(delimiter) # Split col_value based on delimiter and store the result in split_value split_value = col_value1.split(delimiter) # Check if col_value is not None if col_value1 is not None: # Check if the no of delimiters in split_value is not as expected if no_of_delimiter != len(new_col_names) - 1: # If the length is not same, raise an IndexError with the message mentioning the expected and found length msg = f\"Expected {len(new_col_names)} elements after splitting on delimiter, found {len(split_value)} elements\" raise IndexError( msg, ) # If the length of split_value is same as new_col_names, check if any of the split values is None or empty string elif any( # noqa: RET506 x is None or x.strip() == \"\" for x in split_value[: len(new_col_names)] ): msg = \"Null or empty values are not accepted for columns in strict mode\" raise ValueError( msg, ) # If the above checks pass, return the count of delimiter return int(no_of_delimiter) # If col_value is None, return 0 return 0 num_udf = udf(lambda y: None if y is None else _num_delimiter(y), IntegerType()) # Get the column expression for the column to be split col_expr = df[col_name] # Split the column by the delimiter split_col_expr = split(trim(col_expr), delimiter) # Check the split mode if mode == \"strict\": # Create an array of select expressions to create new columns from the split values select_exprs = [ when(split_col_expr.getItem(i) != \"\", split_col_expr.getItem(i)).alias( new_col_names[i], ) for i in range(len(new_col_names)) ] # Select all the columns from the input DataFrame, along with the new split columns df = df.select(\"*\", *select_exprs) # noqa: PD901 df = df.withColumn(\"del_length\", num_udf(df[col_name])) # noqa: PD901 df.cache() # Drop the original column if the new columns were created successfully df = df.select([c for c in df.columns if c not in {\"del_length\", col_name}]) # noqa: PD901 elif mode == \"permissive\": # Create an array of select expressions to create new columns from the split values # Use the default value if a split value is missing or empty select_exprs = select_exprs = [ when(length(split_col_expr.getItem(i)) > 0, split_col_expr.getItem(i)) .otherwise(default) .alias(new_col_names[i]) for i in range(len(new_col_names)) ] # Select all the columns from the input DataFrame, along with the new split columns # Drop the original column if the new columns were created successfully df = df.select(\"*\", *select_exprs).drop(col_name) # noqa: PD901 df.cache() else: msg = f\"Invalid mode: {mode}\" raise ValueError(msg) # Return the DataFrame with the split columns return df","title":"Split columns"},{"location":"reference/quinn/split_columns/#quinn.split_columns.split_col","text":"Splits the given column based on the delimiter and creates new columns with the split values. Parameters: Name Type Description Default df DataFrame The input DataFrame required col_name str The name of the column to split required delimiter str The delimiter to split the column on required new_col_names list [ str ] A list of two strings for the new column names required mode str The split mode. Can be \"strict\" or \"permissive\". Default is \"permissive\" 'permissive' default Optional [ str ] If the mode is \"permissive\" then default value will be assigned to column None Returns: Type Description pyspark.sql.DataFrame. dataframe: The resulting DataFrame with the split columns Source code in quinn/split_columns.py def split_col( # noqa: PLR0913 df: DataFrame, col_name: str, delimiter: str, new_col_names: list[str], mode: str = \"permissive\", default: Optional[str] = None, ) -> DataFrame: \"\"\"Splits the given column based on the delimiter and creates new columns with the split values. :param df: The input DataFrame :type df: pyspark.sql.DataFrame :param col_name: The name of the column to split :type col_name: str :param delimiter: The delimiter to split the column on :type delimiter: str :param new_col_names: A list of two strings for the new column names :type new_col_names: (List[str]) :param mode: The split mode. Can be \"strict\" or \"permissive\". Default is \"permissive\" :type mode: str :param default: If the mode is \"permissive\" then default value will be assigned to column :type mode: str :return: dataframe: The resulting DataFrame with the split columns :rtype: pyspark.sql.DataFrame. \"\"\" # Check if the column to be split exists in the DataFrame if col_name not in df.columns: msg = f\"Column '{col_name}' not found in DataFrame.\" raise ValueError(msg) # Check if the delimiter is a string if not isinstance(delimiter, str): msg = \"Delimiter must be a string.\" raise TypeError(msg) # Check if the new column names are a list of strings if not isinstance(new_col_names, list): msg = \"New column names must be a list of strings.\" raise TypeError(msg) # Define a UDF to check the occurrence of delimitter def _num_delimiter(col_value1: str) -> int: # Get the count of delimiter and store the result in no_of_delimiter no_of_delimiter = col_value1.count(delimiter) # Split col_value based on delimiter and store the result in split_value split_value = col_value1.split(delimiter) # Check if col_value is not None if col_value1 is not None: # Check if the no of delimiters in split_value is not as expected if no_of_delimiter != len(new_col_names) - 1: # If the length is not same, raise an IndexError with the message mentioning the expected and found length msg = f\"Expected {len(new_col_names)} elements after splitting on delimiter, found {len(split_value)} elements\" raise IndexError( msg, ) # If the length of split_value is same as new_col_names, check if any of the split values is None or empty string elif any( # noqa: RET506 x is None or x.strip() == \"\" for x in split_value[: len(new_col_names)] ): msg = \"Null or empty values are not accepted for columns in strict mode\" raise ValueError( msg, ) # If the above checks pass, return the count of delimiter return int(no_of_delimiter) # If col_value is None, return 0 return 0 num_udf = udf(lambda y: None if y is None else _num_delimiter(y), IntegerType()) # Get the column expression for the column to be split col_expr = df[col_name] # Split the column by the delimiter split_col_expr = split(trim(col_expr), delimiter) # Check the split mode if mode == \"strict\": # Create an array of select expressions to create new columns from the split values select_exprs = [ when(split_col_expr.getItem(i) != \"\", split_col_expr.getItem(i)).alias( new_col_names[i], ) for i in range(len(new_col_names)) ] # Select all the columns from the input DataFrame, along with the new split columns df = df.select(\"*\", *select_exprs) # noqa: PD901 df = df.withColumn(\"del_length\", num_udf(df[col_name])) # noqa: PD901 df.cache() # Drop the original column if the new columns were created successfully df = df.select([c for c in df.columns if c not in {\"del_length\", col_name}]) # noqa: PD901 elif mode == \"permissive\": # Create an array of select expressions to create new columns from the split values # Use the default value if a split value is missing or empty select_exprs = select_exprs = [ when(length(split_col_expr.getItem(i)) > 0, split_col_expr.getItem(i)) .otherwise(default) .alias(new_col_names[i]) for i in range(len(new_col_names)) ] # Select all the columns from the input DataFrame, along with the new split columns # Drop the original column if the new columns were created successfully df = df.select(\"*\", *select_exprs).drop(col_name) # noqa: PD901 df.cache() else: msg = f\"Invalid mode: {mode}\" raise ValueError(msg) # Return the DataFrame with the split columns return df","title":"split_col()"},{"location":"reference/quinn/transformations/","text":"flatten_dataframe(df, separator=':', replace_char='_', sanitized_columns=False) Flattens the complex columns in the DataFrame. Parameters: Name Type Description Default df DataFrame The input PySpark DataFrame. required separator str The separator to use in the resulting flattened column names, defaults to \":\". ':' replace_char str The character to replace special characters with in column names, defaults to \"_\". '_' sanitized_columns bool Whether to sanitize column names, defaults to False. False Returns: Type Description DataFrame .. note:: This function assumes the input DataFrame has a consistent schema across all rows. If you have files with different schemas, process each separately instead. .. example:: Example usage: >>> data = [ ( 1, (\"Alice\", 25), {\"A\": 100, \"B\": 200}, [\"apple\", \"banana\"], {\"key\": {\"nested_key\": 10}}, {\"A#\": 1000, \"B@\": 2000}, ), ( 2, (\"Bob\", 30), {\"A\": 150, \"B\": 250}, [\"orange\", \"grape\"], {\"key\": {\"nested_key\": 20}}, {\"A#\": 1500, \"B@\": 2500}, ), ] >>> df = spark.createDataFrame(data) >>> flattened_df = flatten_dataframe(df) >>> flattened_df.show() >>> flattened_df_with_hyphen = flatten_dataframe(df, replace_char=\"-\") >>> flattened_df_with_hyphen.show() The DataFrame with all complex data types flattened. Source code in quinn/transformations.py def flatten_dataframe( df: DataFrame, separator: str = \":\", replace_char: str = \"_\", sanitized_columns: bool = False, ) -> DataFrame: \"\"\"Flattens the complex columns in the DataFrame. :param df: The input PySpark DataFrame. :type df: DataFrame :param separator: The separator to use in the resulting flattened column names, defaults to \":\". :type separator: str, optional :param replace_char: The character to replace special characters with in column names, defaults to \"_\". :type replace_char: str, optional :param sanitized_columns: Whether to sanitize column names, defaults to False. :type sanitized_columns: bool, optional :return: The DataFrame with all complex data types flattened. :rtype: DataFrame .. note:: This function assumes the input DataFrame has a consistent schema across all rows. If you have files with different schemas, process each separately instead. .. example:: Example usage: >>> data = [ ( 1, (\"Alice\", 25), {\"A\": 100, \"B\": 200}, [\"apple\", \"banana\"], {\"key\": {\"nested_key\": 10}}, {\"A#\": 1000, \"B@\": 2000}, ), ( 2, (\"Bob\", 30), {\"A\": 150, \"B\": 250}, [\"orange\", \"grape\"], {\"key\": {\"nested_key\": 20}}, {\"A#\": 1500, \"B@\": 2500}, ), ] >>> df = spark.createDataFrame(data) >>> flattened_df = flatten_dataframe(df) >>> flattened_df.show() >>> flattened_df_with_hyphen = flatten_dataframe(df, replace_char=\"-\") >>> flattened_df_with_hyphen.show() \"\"\" def sanitize_column_name(name: str, rc: str = \"_\") -> str: \"\"\"Sanitizes column names by replacing special characters with the specified character. :param name: The original column name. :type name: str :param rc: The character to replace special characters with, defaults to '_'. :type rc: str, optional :return: The sanitized column name. :rtype: str \"\"\" return re.sub(r\"[^a-zA-Z0-9_]\", rc, name) def explode_array(df: DataFrame, col_name: str) -> DataFrame: \"\"\"Explodes the specified ArrayType column in the input DataFrame and returns a new DataFrame with the exploded column. :param df: The input PySpark DataFrame. :type df: DataFrame :param col_name: The column name of the ArrayType to be exploded. :type col_name: str :return: The DataFrame with the exploded ArrayType column. :rtype: DataFrame \"\"\" return df.select( \"*\", F.explode_outer(F.col(f\"`{col_name}`\")).alias(col_name), ).drop( col_name, ) fields = complex_fields(df.schema) while len(fields) != 0: col_name = next(iter(fields.keys())) if isinstance(fields[col_name], StructType): df = flatten_struct(df, col_name, separator) # noqa: PD901 elif isinstance(fields[col_name], ArrayType): df = explode_array(df, col_name) # noqa: PD901 elif isinstance(fields[col_name], MapType): df = flatten_map(df, col_name, separator) # noqa: PD901 fields = complex_fields(df.schema) # Sanitize column names with the specified replace_char if sanitized_columns: sanitized_columns = [ sanitize_column_name(col_name, replace_char) for col_name in df.columns ] df = df.toDF(*sanitized_columns) # noqa: PD901 return df flatten_map(df, col_name, separator=':') Flattens the specified MapType column in the input DataFrame and returns a new DataFrame with the flattened columns. Parameters: Name Type Description Default df DataFrame The input PySpark DataFrame. required col_name str The column name of the MapType to be flattened. required separator str The separator to use in the resulting flattened column names, defaults to \":\". ':' Returns: Type Description DataFrame The DataFrame with the flattened MapType column. Source code in quinn/transformations.py def flatten_map(df: DataFrame, col_name: str, separator: str = \":\") -> DataFrame: \"\"\"Flattens the specified MapType column in the input DataFrame and returns a new DataFrame with the flattened columns. :param df: The input PySpark DataFrame. :type df: DataFrame :param col_name: The column name of the MapType to be flattened. :type col_name: str :param separator: The separator to use in the resulting flattened column names, defaults to \":\". :type separator: str, optional :return: The DataFrame with the flattened MapType column. :rtype: DataFrame \"\"\" keys_df = df.select(F.explode_outer(F.map_keys(F.col(f\"`{col_name}`\")))).distinct() keys = [row[0] for row in keys_df.collect()] key_cols = [ F.col(f\"`{col_name}`\").getItem(k).alias(col_name + separator + k) for k in keys ] return df.select( [F.col(f\"`{col}`\") for col in df.columns if col != col_name] + key_cols, ) flatten_struct(df, col_name, separator=':') Flattens the specified StructType column in the input DataFrame and returns a new DataFrame with the flattened columns. Parameters: Name Type Description Default df DataFrame The input PySpark DataFrame. required col_name str The column name of the StructType to be flattened. required separator str The separator to use in the resulting flattened column names, defaults to ':'. ':' Returns: Type Description List[Column] The DataFrame with the flattened StructType column. Source code in quinn/transformations.py def flatten_struct(df: DataFrame, col_name: str, separator: str = \":\") -> DataFrame: \"\"\"Flattens the specified StructType column in the input DataFrame and returns a new DataFrame with the flattened columns. :param df: The input PySpark DataFrame. :type df: DataFrame :param col_name: The column name of the StructType to be flattened. :type col_name: str :param separator: The separator to use in the resulting flattened column names, defaults to ':'. :type separator: str, optional :return: The DataFrame with the flattened StructType column. :rtype: List[Column] \"\"\" struct_type = complex_fields(df.schema)[col_name] expanded = [ F.col(f\"`{col_name}`.`{k}`\").alias(col_name + separator + k) for k in [n.name for n in struct_type.fields] ] return df.select(\"*\", *expanded).drop(F.col(f\"`{col_name}`\")) snake_case_col_names(df) Function takes a DataFrame instance and returns the same DataFrame instance with all column names converted to snake case. (e.g. col_name_1 ). It uses the to_snake_case function in conjunction with the with_columns_renamed function to achieve this. Parameters: Name Type Description Default df DataFrame A DataFrame instance to process required Returns: Type Description ``DataFrame``. A DataFrame instance with column names converted to snake case Source code in quinn/transformations.py def snake_case_col_names(df: DataFrame) -> DataFrame: \"\"\"Function takes a ``DataFrame`` instance and returns the same ``DataFrame`` instance with all column names converted to snake case. (e.g. ``col_name_1``). It uses the ``to_snake_case`` function in conjunction with the ``with_columns_renamed`` function to achieve this. :param df: A ``DataFrame`` instance to process :type df: ``DataFrame`` :return: A ``DataFrame`` instance with column names converted to snake case :rtype: ``DataFrame``. \"\"\" return with_columns_renamed(to_snake_case)(df) sort_columns(df, sort_order, sort_nested=False) This function sorts the columns of a given DataFrame based on a given sort order. The sort_order parameter can either be asc or desc , which correspond to ascending and descending order, respectively. If any other value is provided for the sort_order parameter, a ValueError will be raised. Parameters: Name Type Description Default df DataFrame A DataFrame required sort_order str The order in which to sort the columns in the DataFrame required sort_nested bool Whether to sort nested structs or not. Defaults to false. False Returns: Type Description pyspark.sql.DataFrame A DataFrame with the columns sorted in the chosen order Source code in quinn/transformations.py def sort_columns( # noqa: C901,PLR0915 df: DataFrame, sort_order: str, sort_nested: bool = False, ) -> DataFrame: \"\"\"This function sorts the columns of a given DataFrame based on a given sort order. The ``sort_order`` parameter can either be ``asc`` or ``desc``, which correspond to ascending and descending order, respectively. If any other value is provided for the ``sort_order`` parameter, a ``ValueError`` will be raised. :param df: A DataFrame :type df: pyspark.sql.DataFrame :param sort_order: The order in which to sort the columns in the DataFrame :type sort_order: str :param sort_nested: Whether to sort nested structs or not. Defaults to false. :type sort_nested: bool :return: A DataFrame with the columns sorted in the chosen order :rtype: pyspark.sql.DataFrame \"\"\" def sort_nested_cols(schema, is_reversed, base_field=\"\") -> list[str]: # noqa: ANN001 # recursively check nested fields and sort them # https://stackoverflow.com/questions/57821538/how-to-sort-columns-of-nested-structs-alphabetically-in-pyspark # Credits: @pault for logic def parse_fields( fields_to_sort: list, parent_struct, is_reversed: bool, # noqa: ANN001 ) -> list: sorted_fields: list = sorted( fields_to_sort, key=lambda x: x[\"name\"], reverse=is_reversed, ) results = [] for field in sorted_fields: new_struct = StructType([StructField.fromJson(field)]) new_base_field = parent_struct.name if base_field: new_base_field = base_field + \".\" + new_base_field results.extend( sort_nested_cols(new_struct, is_reversed, base_field=new_base_field), ) return results select_cols = [] for parent_struct in sorted(schema, key=lambda x: x.name, reverse=is_reversed): field_type = parent_struct.dataType if isinstance(field_type, ArrayType): array_parent = parent_struct.jsonValue()[\"type\"][\"elementType\"] base_str = f\"transform({parent_struct.name}\" suffix_str = f\") AS {parent_struct.name}\" # if struct in array, create mapping to struct if array_parent[\"type\"] == \"struct\": array_parent = array_parent[\"fields\"] base_str = f\"{base_str}, x -> struct(\" suffix_str = f\"){suffix_str}\" array_elements = parse_fields(array_parent, parent_struct, is_reversed) element_names = [i.split(\".\")[-1] for i in array_elements] array_elements_formatted = [f\"x.{i} as {i}\" for i in element_names] # create a string representation of the sorted array # ex: transform(phone_numbers, x -> struct(x.number as number, x.type as type)) AS phone_numbers result = f\"{base_str}{', '.join(array_elements_formatted)}{suffix_str}\" elif isinstance(field_type, StructType): field_list = parent_struct.jsonValue()[\"type\"][\"fields\"] sub_fields = parse_fields(field_list, parent_struct, is_reversed) # create a string representation of the sorted struct # ex: struct(address.zip.first5, address.zip.last4) AS zip result = f\"struct({', '.join(sub_fields)}) AS {parent_struct.name}\" elif base_field: result = f\"{base_field}.{parent_struct.name}\" else: result = parent_struct.name select_cols.append(result) return select_cols def get_original_nullability(field: StructField, result_dict: dict) -> None: if hasattr(field, \"nullable\"): result_dict[field.name] = field.nullable else: result_dict[field.name] = True if not isinstance(field.dataType, StructType) and not isinstance( field.dataType, ArrayType, ): return if isinstance(field.dataType, ArrayType): result_dict[f\"{field.name}_element\"] = field.dataType.containsNull children = field.dataType.elementType.fields else: children = field.dataType.fields for i in children: get_original_nullability(i, result_dict) def fix_nullability(field: StructField, result_dict: dict) -> None: field.nullable = result_dict[field.name] if not isinstance(field.dataType, StructType) and not isinstance( field.dataType, ArrayType, ): return if isinstance(field.dataType, ArrayType): # save the containsNull property of the ArrayType field.dataType.containsNull = result_dict[f\"{field.name}_element\"] children = field.dataType.elementType.fields else: children = field.dataType.fields for i in children: fix_nullability(i, result_dict) if sort_order not in [\"asc\", \"desc\"]: msg = f\"['asc', 'desc'] are the only valid sort orders and you entered a sort order of '{sort_order}'\" raise ValueError( msg, ) reverse_lookup = { \"asc\": False, \"desc\": True, } is_reversed: bool = reverse_lookup[sort_order] top_level_sorted_df = df.select(*sorted(df.columns, reverse=is_reversed)) if not sort_nested: return top_level_sorted_df is_nested: bool = any( isinstance(i.dataType, (StructType, ArrayType)) for i in top_level_sorted_df.schema ) if not is_nested: return top_level_sorted_df fully_sorted_schema = sort_nested_cols(top_level_sorted_df.schema, is_reversed) output = df.selectExpr(fully_sorted_schema) result_dict = {} for field in df.schema: get_original_nullability(field, result_dict) for field in output.schema: fix_nullability(field, result_dict) if not hasattr(SparkSession, \"getActiveSession\"): # spark 2.4 spark = SparkSession.builder.getOrCreate() else: spark = SparkSession.getActiveSession() spark = spark if spark is not None else SparkSession.builder.getOrCreate() return spark.createDataFrame(output.rdd, output.schema) to_snake_case(s) Takes a string and converts it to snake case format. Parameters: Name Type Description Default s str The string to be converted. required Returns: Type Description str The string in snake case format. Source code in quinn/transformations.py def to_snake_case(s: str) -> str: \"\"\"Takes a string and converts it to snake case format. :param s: The string to be converted. :type s: str :return: The string in snake case format. :rtype: str \"\"\" return s.lower().replace(\" \", \"_\") with_columns_renamed(fun) Ffunction designed to rename the columns of a Spark DataFrame . It takes a Callable[[str], str] object as an argument ( fun ) and returns a Callable[[DataFrame], DataFrame] object. When _() is called on a DataFrame , it creates a list of column names, applying the argument fun() to each of them, and returning a new DataFrame with the new column names. Parameters: Name Type Description Default fun Callable [[ str ], str ] Renaming function required Returns: Type Description Callable [[ DataFrame ], DataFrame ] Function which takes DataFrame as parameter. Source code in quinn/transformations.py def with_columns_renamed(fun: Callable[[str], str]) -> Callable[[DataFrame], DataFrame]: \"\"\"Ffunction designed to rename the columns of a `Spark DataFrame`. It takes a `Callable[[str], str]` object as an argument (``fun``) and returns a `Callable[[DataFrame], DataFrame]` object. When `_()` is called on a `DataFrame`, it creates a list of column names, applying the argument `fun()` to each of them, and returning a new `DataFrame` with the new column names. :param fun: Renaming function :returns: Function which takes DataFrame as parameter. \"\"\" def _(df: DataFrame) -> DataFrame: cols = [F.col(f\"`{col_name}`\").alias(fun(col_name)) for col_name in df.columns] return df.select(*cols) return _ with_some_columns_renamed(fun, change_col_name) Function that takes a Callable[[str], str] and a Callable[[str], str] and returns a Callable[[DataFrame], DataFrame] . Which in turn takes a DataFrame and returns a DataFrame with some of its columns renamed. Parameters: Name Type Description Default fun Callable [[ str ], str ] A function that takes a column name as a string and returns a new name as a string. required change_col_name Callable [[ str ], str ] A function that takes a column name as a string and returns a boolean. required Returns: Type Description `Callable[[DataFrame], DataFrame]` A Callable[[DataFrame], DataFrame] , which takes a DataFrame and returns a DataFrame with some of its columns renamed. Source code in quinn/transformations.py def with_some_columns_renamed( fun: Callable[[str], str], change_col_name: Callable[[str], str], ) -> Callable[[DataFrame], DataFrame]: \"\"\"Function that takes a `Callable[[str], str]` and a `Callable[[str], str]` and returns a `Callable[[DataFrame], DataFrame]`. Which in turn takes a `DataFrame` and returns a `DataFrame` with some of its columns renamed. :param fun: A function that takes a column name as a string and returns a new name as a string. :type fun: `Callable[[str], str]` :param change_col_name: A function that takes a column name as a string and returns a boolean. :type change_col_name: `Callable[[str], str]` :return: A `Callable[[DataFrame], DataFrame]`, which takes a `DataFrame` and returns a `DataFrame` with some of its columns renamed. :rtype: `Callable[[DataFrame], DataFrame]` \"\"\" def _(df: DataFrame) -> DataFrame: cols = [ F.col(f\"`{col_name}`\").alias(fun(col_name)) if change_col_name(col_name) else F.col(f\"`{col_name}`\") for col_name in df.columns ] return df.select(*cols) return _","title":"Transformations"},{"location":"reference/quinn/transformations/#quinn.transformations.flatten_dataframe","text":"Flattens the complex columns in the DataFrame. Parameters: Name Type Description Default df DataFrame The input PySpark DataFrame. required separator str The separator to use in the resulting flattened column names, defaults to \":\". ':' replace_char str The character to replace special characters with in column names, defaults to \"_\". '_' sanitized_columns bool Whether to sanitize column names, defaults to False. False Returns: Type Description DataFrame .. note:: This function assumes the input DataFrame has a consistent schema across all rows. If you have files with different schemas, process each separately instead. .. example:: Example usage: >>> data = [ ( 1, (\"Alice\", 25), {\"A\": 100, \"B\": 200}, [\"apple\", \"banana\"], {\"key\": {\"nested_key\": 10}}, {\"A#\": 1000, \"B@\": 2000}, ), ( 2, (\"Bob\", 30), {\"A\": 150, \"B\": 250}, [\"orange\", \"grape\"], {\"key\": {\"nested_key\": 20}}, {\"A#\": 1500, \"B@\": 2500}, ), ] >>> df = spark.createDataFrame(data) >>> flattened_df = flatten_dataframe(df) >>> flattened_df.show() >>> flattened_df_with_hyphen = flatten_dataframe(df, replace_char=\"-\") >>> flattened_df_with_hyphen.show() The DataFrame with all complex data types flattened. Source code in quinn/transformations.py def flatten_dataframe( df: DataFrame, separator: str = \":\", replace_char: str = \"_\", sanitized_columns: bool = False, ) -> DataFrame: \"\"\"Flattens the complex columns in the DataFrame. :param df: The input PySpark DataFrame. :type df: DataFrame :param separator: The separator to use in the resulting flattened column names, defaults to \":\". :type separator: str, optional :param replace_char: The character to replace special characters with in column names, defaults to \"_\". :type replace_char: str, optional :param sanitized_columns: Whether to sanitize column names, defaults to False. :type sanitized_columns: bool, optional :return: The DataFrame with all complex data types flattened. :rtype: DataFrame .. note:: This function assumes the input DataFrame has a consistent schema across all rows. If you have files with different schemas, process each separately instead. .. example:: Example usage: >>> data = [ ( 1, (\"Alice\", 25), {\"A\": 100, \"B\": 200}, [\"apple\", \"banana\"], {\"key\": {\"nested_key\": 10}}, {\"A#\": 1000, \"B@\": 2000}, ), ( 2, (\"Bob\", 30), {\"A\": 150, \"B\": 250}, [\"orange\", \"grape\"], {\"key\": {\"nested_key\": 20}}, {\"A#\": 1500, \"B@\": 2500}, ), ] >>> df = spark.createDataFrame(data) >>> flattened_df = flatten_dataframe(df) >>> flattened_df.show() >>> flattened_df_with_hyphen = flatten_dataframe(df, replace_char=\"-\") >>> flattened_df_with_hyphen.show() \"\"\" def sanitize_column_name(name: str, rc: str = \"_\") -> str: \"\"\"Sanitizes column names by replacing special characters with the specified character. :param name: The original column name. :type name: str :param rc: The character to replace special characters with, defaults to '_'. :type rc: str, optional :return: The sanitized column name. :rtype: str \"\"\" return re.sub(r\"[^a-zA-Z0-9_]\", rc, name) def explode_array(df: DataFrame, col_name: str) -> DataFrame: \"\"\"Explodes the specified ArrayType column in the input DataFrame and returns a new DataFrame with the exploded column. :param df: The input PySpark DataFrame. :type df: DataFrame :param col_name: The column name of the ArrayType to be exploded. :type col_name: str :return: The DataFrame with the exploded ArrayType column. :rtype: DataFrame \"\"\" return df.select( \"*\", F.explode_outer(F.col(f\"`{col_name}`\")).alias(col_name), ).drop( col_name, ) fields = complex_fields(df.schema) while len(fields) != 0: col_name = next(iter(fields.keys())) if isinstance(fields[col_name], StructType): df = flatten_struct(df, col_name, separator) # noqa: PD901 elif isinstance(fields[col_name], ArrayType): df = explode_array(df, col_name) # noqa: PD901 elif isinstance(fields[col_name], MapType): df = flatten_map(df, col_name, separator) # noqa: PD901 fields = complex_fields(df.schema) # Sanitize column names with the specified replace_char if sanitized_columns: sanitized_columns = [ sanitize_column_name(col_name, replace_char) for col_name in df.columns ] df = df.toDF(*sanitized_columns) # noqa: PD901 return df","title":"flatten_dataframe()"},{"location":"reference/quinn/transformations/#quinn.transformations.flatten_map","text":"Flattens the specified MapType column in the input DataFrame and returns a new DataFrame with the flattened columns. Parameters: Name Type Description Default df DataFrame The input PySpark DataFrame. required col_name str The column name of the MapType to be flattened. required separator str The separator to use in the resulting flattened column names, defaults to \":\". ':' Returns: Type Description DataFrame The DataFrame with the flattened MapType column. Source code in quinn/transformations.py def flatten_map(df: DataFrame, col_name: str, separator: str = \":\") -> DataFrame: \"\"\"Flattens the specified MapType column in the input DataFrame and returns a new DataFrame with the flattened columns. :param df: The input PySpark DataFrame. :type df: DataFrame :param col_name: The column name of the MapType to be flattened. :type col_name: str :param separator: The separator to use in the resulting flattened column names, defaults to \":\". :type separator: str, optional :return: The DataFrame with the flattened MapType column. :rtype: DataFrame \"\"\" keys_df = df.select(F.explode_outer(F.map_keys(F.col(f\"`{col_name}`\")))).distinct() keys = [row[0] for row in keys_df.collect()] key_cols = [ F.col(f\"`{col_name}`\").getItem(k).alias(col_name + separator + k) for k in keys ] return df.select( [F.col(f\"`{col}`\") for col in df.columns if col != col_name] + key_cols, )","title":"flatten_map()"},{"location":"reference/quinn/transformations/#quinn.transformations.flatten_struct","text":"Flattens the specified StructType column in the input DataFrame and returns a new DataFrame with the flattened columns. Parameters: Name Type Description Default df DataFrame The input PySpark DataFrame. required col_name str The column name of the StructType to be flattened. required separator str The separator to use in the resulting flattened column names, defaults to ':'. ':' Returns: Type Description List[Column] The DataFrame with the flattened StructType column. Source code in quinn/transformations.py def flatten_struct(df: DataFrame, col_name: str, separator: str = \":\") -> DataFrame: \"\"\"Flattens the specified StructType column in the input DataFrame and returns a new DataFrame with the flattened columns. :param df: The input PySpark DataFrame. :type df: DataFrame :param col_name: The column name of the StructType to be flattened. :type col_name: str :param separator: The separator to use in the resulting flattened column names, defaults to ':'. :type separator: str, optional :return: The DataFrame with the flattened StructType column. :rtype: List[Column] \"\"\" struct_type = complex_fields(df.schema)[col_name] expanded = [ F.col(f\"`{col_name}`.`{k}`\").alias(col_name + separator + k) for k in [n.name for n in struct_type.fields] ] return df.select(\"*\", *expanded).drop(F.col(f\"`{col_name}`\"))","title":"flatten_struct()"},{"location":"reference/quinn/transformations/#quinn.transformations.snake_case_col_names","text":"Function takes a DataFrame instance and returns the same DataFrame instance with all column names converted to snake case. (e.g. col_name_1 ). It uses the to_snake_case function in conjunction with the with_columns_renamed function to achieve this. Parameters: Name Type Description Default df DataFrame A DataFrame instance to process required Returns: Type Description ``DataFrame``. A DataFrame instance with column names converted to snake case Source code in quinn/transformations.py def snake_case_col_names(df: DataFrame) -> DataFrame: \"\"\"Function takes a ``DataFrame`` instance and returns the same ``DataFrame`` instance with all column names converted to snake case. (e.g. ``col_name_1``). It uses the ``to_snake_case`` function in conjunction with the ``with_columns_renamed`` function to achieve this. :param df: A ``DataFrame`` instance to process :type df: ``DataFrame`` :return: A ``DataFrame`` instance with column names converted to snake case :rtype: ``DataFrame``. \"\"\" return with_columns_renamed(to_snake_case)(df)","title":"snake_case_col_names()"},{"location":"reference/quinn/transformations/#quinn.transformations.sort_columns","text":"This function sorts the columns of a given DataFrame based on a given sort order. The sort_order parameter can either be asc or desc , which correspond to ascending and descending order, respectively. If any other value is provided for the sort_order parameter, a ValueError will be raised. Parameters: Name Type Description Default df DataFrame A DataFrame required sort_order str The order in which to sort the columns in the DataFrame required sort_nested bool Whether to sort nested structs or not. Defaults to false. False Returns: Type Description pyspark.sql.DataFrame A DataFrame with the columns sorted in the chosen order Source code in quinn/transformations.py def sort_columns( # noqa: C901,PLR0915 df: DataFrame, sort_order: str, sort_nested: bool = False, ) -> DataFrame: \"\"\"This function sorts the columns of a given DataFrame based on a given sort order. The ``sort_order`` parameter can either be ``asc`` or ``desc``, which correspond to ascending and descending order, respectively. If any other value is provided for the ``sort_order`` parameter, a ``ValueError`` will be raised. :param df: A DataFrame :type df: pyspark.sql.DataFrame :param sort_order: The order in which to sort the columns in the DataFrame :type sort_order: str :param sort_nested: Whether to sort nested structs or not. Defaults to false. :type sort_nested: bool :return: A DataFrame with the columns sorted in the chosen order :rtype: pyspark.sql.DataFrame \"\"\" def sort_nested_cols(schema, is_reversed, base_field=\"\") -> list[str]: # noqa: ANN001 # recursively check nested fields and sort them # https://stackoverflow.com/questions/57821538/how-to-sort-columns-of-nested-structs-alphabetically-in-pyspark # Credits: @pault for logic def parse_fields( fields_to_sort: list, parent_struct, is_reversed: bool, # noqa: ANN001 ) -> list: sorted_fields: list = sorted( fields_to_sort, key=lambda x: x[\"name\"], reverse=is_reversed, ) results = [] for field in sorted_fields: new_struct = StructType([StructField.fromJson(field)]) new_base_field = parent_struct.name if base_field: new_base_field = base_field + \".\" + new_base_field results.extend( sort_nested_cols(new_struct, is_reversed, base_field=new_base_field), ) return results select_cols = [] for parent_struct in sorted(schema, key=lambda x: x.name, reverse=is_reversed): field_type = parent_struct.dataType if isinstance(field_type, ArrayType): array_parent = parent_struct.jsonValue()[\"type\"][\"elementType\"] base_str = f\"transform({parent_struct.name}\" suffix_str = f\") AS {parent_struct.name}\" # if struct in array, create mapping to struct if array_parent[\"type\"] == \"struct\": array_parent = array_parent[\"fields\"] base_str = f\"{base_str}, x -> struct(\" suffix_str = f\"){suffix_str}\" array_elements = parse_fields(array_parent, parent_struct, is_reversed) element_names = [i.split(\".\")[-1] for i in array_elements] array_elements_formatted = [f\"x.{i} as {i}\" for i in element_names] # create a string representation of the sorted array # ex: transform(phone_numbers, x -> struct(x.number as number, x.type as type)) AS phone_numbers result = f\"{base_str}{', '.join(array_elements_formatted)}{suffix_str}\" elif isinstance(field_type, StructType): field_list = parent_struct.jsonValue()[\"type\"][\"fields\"] sub_fields = parse_fields(field_list, parent_struct, is_reversed) # create a string representation of the sorted struct # ex: struct(address.zip.first5, address.zip.last4) AS zip result = f\"struct({', '.join(sub_fields)}) AS {parent_struct.name}\" elif base_field: result = f\"{base_field}.{parent_struct.name}\" else: result = parent_struct.name select_cols.append(result) return select_cols def get_original_nullability(field: StructField, result_dict: dict) -> None: if hasattr(field, \"nullable\"): result_dict[field.name] = field.nullable else: result_dict[field.name] = True if not isinstance(field.dataType, StructType) and not isinstance( field.dataType, ArrayType, ): return if isinstance(field.dataType, ArrayType): result_dict[f\"{field.name}_element\"] = field.dataType.containsNull children = field.dataType.elementType.fields else: children = field.dataType.fields for i in children: get_original_nullability(i, result_dict) def fix_nullability(field: StructField, result_dict: dict) -> None: field.nullable = result_dict[field.name] if not isinstance(field.dataType, StructType) and not isinstance( field.dataType, ArrayType, ): return if isinstance(field.dataType, ArrayType): # save the containsNull property of the ArrayType field.dataType.containsNull = result_dict[f\"{field.name}_element\"] children = field.dataType.elementType.fields else: children = field.dataType.fields for i in children: fix_nullability(i, result_dict) if sort_order not in [\"asc\", \"desc\"]: msg = f\"['asc', 'desc'] are the only valid sort orders and you entered a sort order of '{sort_order}'\" raise ValueError( msg, ) reverse_lookup = { \"asc\": False, \"desc\": True, } is_reversed: bool = reverse_lookup[sort_order] top_level_sorted_df = df.select(*sorted(df.columns, reverse=is_reversed)) if not sort_nested: return top_level_sorted_df is_nested: bool = any( isinstance(i.dataType, (StructType, ArrayType)) for i in top_level_sorted_df.schema ) if not is_nested: return top_level_sorted_df fully_sorted_schema = sort_nested_cols(top_level_sorted_df.schema, is_reversed) output = df.selectExpr(fully_sorted_schema) result_dict = {} for field in df.schema: get_original_nullability(field, result_dict) for field in output.schema: fix_nullability(field, result_dict) if not hasattr(SparkSession, \"getActiveSession\"): # spark 2.4 spark = SparkSession.builder.getOrCreate() else: spark = SparkSession.getActiveSession() spark = spark if spark is not None else SparkSession.builder.getOrCreate() return spark.createDataFrame(output.rdd, output.schema)","title":"sort_columns()"},{"location":"reference/quinn/transformations/#quinn.transformations.to_snake_case","text":"Takes a string and converts it to snake case format. Parameters: Name Type Description Default s str The string to be converted. required Returns: Type Description str The string in snake case format. Source code in quinn/transformations.py def to_snake_case(s: str) -> str: \"\"\"Takes a string and converts it to snake case format. :param s: The string to be converted. :type s: str :return: The string in snake case format. :rtype: str \"\"\" return s.lower().replace(\" \", \"_\")","title":"to_snake_case()"},{"location":"reference/quinn/transformations/#quinn.transformations.with_columns_renamed","text":"Ffunction designed to rename the columns of a Spark DataFrame . It takes a Callable[[str], str] object as an argument ( fun ) and returns a Callable[[DataFrame], DataFrame] object. When _() is called on a DataFrame , it creates a list of column names, applying the argument fun() to each of them, and returning a new DataFrame with the new column names. Parameters: Name Type Description Default fun Callable [[ str ], str ] Renaming function required Returns: Type Description Callable [[ DataFrame ], DataFrame ] Function which takes DataFrame as parameter. Source code in quinn/transformations.py def with_columns_renamed(fun: Callable[[str], str]) -> Callable[[DataFrame], DataFrame]: \"\"\"Ffunction designed to rename the columns of a `Spark DataFrame`. It takes a `Callable[[str], str]` object as an argument (``fun``) and returns a `Callable[[DataFrame], DataFrame]` object. When `_()` is called on a `DataFrame`, it creates a list of column names, applying the argument `fun()` to each of them, and returning a new `DataFrame` with the new column names. :param fun: Renaming function :returns: Function which takes DataFrame as parameter. \"\"\" def _(df: DataFrame) -> DataFrame: cols = [F.col(f\"`{col_name}`\").alias(fun(col_name)) for col_name in df.columns] return df.select(*cols) return _","title":"with_columns_renamed()"},{"location":"reference/quinn/transformations/#quinn.transformations.with_some_columns_renamed","text":"Function that takes a Callable[[str], str] and a Callable[[str], str] and returns a Callable[[DataFrame], DataFrame] . Which in turn takes a DataFrame and returns a DataFrame with some of its columns renamed. Parameters: Name Type Description Default fun Callable [[ str ], str ] A function that takes a column name as a string and returns a new name as a string. required change_col_name Callable [[ str ], str ] A function that takes a column name as a string and returns a boolean. required Returns: Type Description `Callable[[DataFrame], DataFrame]` A Callable[[DataFrame], DataFrame] , which takes a DataFrame and returns a DataFrame with some of its columns renamed. Source code in quinn/transformations.py def with_some_columns_renamed( fun: Callable[[str], str], change_col_name: Callable[[str], str], ) -> Callable[[DataFrame], DataFrame]: \"\"\"Function that takes a `Callable[[str], str]` and a `Callable[[str], str]` and returns a `Callable[[DataFrame], DataFrame]`. Which in turn takes a `DataFrame` and returns a `DataFrame` with some of its columns renamed. :param fun: A function that takes a column name as a string and returns a new name as a string. :type fun: `Callable[[str], str]` :param change_col_name: A function that takes a column name as a string and returns a boolean. :type change_col_name: `Callable[[str], str]` :return: A `Callable[[DataFrame], DataFrame]`, which takes a `DataFrame` and returns a `DataFrame` with some of its columns renamed. :rtype: `Callable[[DataFrame], DataFrame]` \"\"\" def _(df: DataFrame) -> DataFrame: cols = [ F.col(f\"`{col_name}`\").alias(fun(col_name)) if change_col_name(col_name) else F.col(f\"`{col_name}`\") for col_name in df.columns ] return df.select(*cols) return _","title":"with_some_columns_renamed()"},{"location":"reference/quinn/extensions/","text":"Extensions API.","title":"Index"},{"location":"reference/quinn/extensions/column_ext/","text":"isFalse(self) Function checks if the column is equal to False and returns the column. Parameters: Name Type Description Default self Column Column required Returns: Type Description Column Column Source code in quinn/extensions/column_ext.py def isFalse(self: Column) -> Column: \"\"\"Function checks if the column is equal to False and returns the column. :param self: Column :return: Column :rtype: Column \"\"\" return self == lit(False) isFalsy(self) Returns a Column indicating whether all values in the Column are False or NULL ( falsy ). Each element in the resulting column is True if all the elements in the Column are either NULL or False, or False otherwise. This is accomplished by performing a bitwise or of the isNull condition and a literal False value and then wrapping the result in a when statement. Parameters: Name Type Description Default self Column Column object required Returns: Type Description Column Column object Source code in quinn/extensions/column_ext.py def isFalsy(self: Column) -> Column: \"\"\"Returns a Column indicating whether all values in the Column are False or NULL (**falsy**). Each element in the resulting column is True if all the elements in the Column are either NULL or False, or False otherwise. This is accomplished by performing a bitwise or of the ``isNull`` condition and a literal False value and then wrapping the result in a **when** statement. :param self: Column object :returns: Column object :rtype: Column \"\"\" return when(self.isNull() | (self == lit(False)), True).otherwise(False) isNotIn(self, _list) To see if a value is not in a list of values. :_list: list[Any] Parameters: Name Type Description Default self Column Column object required Source code in quinn/extensions/column_ext.py def isNotIn(self: Column, _list: list[Any]) -> Column: \"\"\"To see if a value is not in a list of values. :param self: Column object :_list: list[Any] :rtype: Column \"\"\" return ~(self.isin(_list)) isNullOrBlank(self) Returns a Boolean value which expresses whether a given column is null or contains only blank characters. Parameters: Name Type Description Default \\*\\*self The :class: Column to check. required Returns: Type Description Column A Column containing True if the column is null or only contains blank characters, or False otherwise. Source code in quinn/extensions/column_ext.py def isNullOrBlank(self: Column) -> Column: r\"\"\"Returns a Boolean value which expresses whether a given column is ``null`` or contains only blank characters. :param \\*\\*self: The :class:`Column` to check. :returns: A `Column` containing ``True`` if the column is ``null`` or only contains blank characters, or ``False`` otherwise. :rtype: Column \"\"\" return (self.isNull()) | (trim(self) == \"\") isTrue(self) Function takes a column of type Column as an argument and returns a column of type Column. It evaluates whether each element in the column argument is equal to True, and if so will return True, otherwise False. Parameters: Name Type Description Default self Column Column object required Returns: Type Description Column Column object Source code in quinn/extensions/column_ext.py def isTrue(self: Column) -> Column: \"\"\"Function takes a column of type Column as an argument and returns a column of type Column. It evaluates whether each element in the column argument is equal to True, and if so will return True, otherwise False. :param self: Column object :returns: Column object :rtype: Column \"\"\" return self == lit(True) isTruthy(self) Calculates a boolean expression that is the opposite of isFalsy for the given Column self. Parameters: Name Type Description Default self Column The Column to calculate the opposite of isFalsy for. required Returns: Type Description Column A Column with the results of the calculation. Source code in quinn/extensions/column_ext.py def isTruthy(self: Column) -> Column: \"\"\"Calculates a boolean expression that is the opposite of isFalsy for the given ``Column`` self. :param Column self: The ``Column`` to calculate the opposite of isFalsy for. :returns: A ``Column`` with the results of the calculation. :rtype: Column \"\"\" return ~(self.isFalsy()) nullBetween(self, lower, upper) To see if a value is between two values in a null friendly way. :lower: Column :upper: Column Parameters: Name Type Description Default self Column Column object required Source code in quinn/extensions/column_ext.py def nullBetween(self: Column, lower: Column, upper: Column) -> Column: \"\"\"To see if a value is between two values in a null friendly way. :param self: Column object :lower: Column :upper: Column :rtype: Column \"\"\" return when(lower.isNull() & upper.isNull(), False).otherwise( when(self.isNull(), False).otherwise( when(lower.isNull() & upper.isNotNull() & (self <= upper), True).otherwise( when( lower.isNotNull() & upper.isNull() & (self >= lower), True, ).otherwise(self.between(lower, upper)), ), ), )","title":"Column ext"},{"location":"reference/quinn/extensions/column_ext/#quinn.extensions.column_ext.isFalse","text":"Function checks if the column is equal to False and returns the column. Parameters: Name Type Description Default self Column Column required Returns: Type Description Column Column Source code in quinn/extensions/column_ext.py def isFalse(self: Column) -> Column: \"\"\"Function checks if the column is equal to False and returns the column. :param self: Column :return: Column :rtype: Column \"\"\" return self == lit(False)","title":"isFalse()"},{"location":"reference/quinn/extensions/column_ext/#quinn.extensions.column_ext.isFalsy","text":"Returns a Column indicating whether all values in the Column are False or NULL ( falsy ). Each element in the resulting column is True if all the elements in the Column are either NULL or False, or False otherwise. This is accomplished by performing a bitwise or of the isNull condition and a literal False value and then wrapping the result in a when statement. Parameters: Name Type Description Default self Column Column object required Returns: Type Description Column Column object Source code in quinn/extensions/column_ext.py def isFalsy(self: Column) -> Column: \"\"\"Returns a Column indicating whether all values in the Column are False or NULL (**falsy**). Each element in the resulting column is True if all the elements in the Column are either NULL or False, or False otherwise. This is accomplished by performing a bitwise or of the ``isNull`` condition and a literal False value and then wrapping the result in a **when** statement. :param self: Column object :returns: Column object :rtype: Column \"\"\" return when(self.isNull() | (self == lit(False)), True).otherwise(False)","title":"isFalsy()"},{"location":"reference/quinn/extensions/column_ext/#quinn.extensions.column_ext.isNotIn","text":"To see if a value is not in a list of values. :_list: list[Any] Parameters: Name Type Description Default self Column Column object required Source code in quinn/extensions/column_ext.py def isNotIn(self: Column, _list: list[Any]) -> Column: \"\"\"To see if a value is not in a list of values. :param self: Column object :_list: list[Any] :rtype: Column \"\"\" return ~(self.isin(_list))","title":"isNotIn()"},{"location":"reference/quinn/extensions/column_ext/#quinn.extensions.column_ext.isNullOrBlank","text":"Returns a Boolean value which expresses whether a given column is null or contains only blank characters. Parameters: Name Type Description Default \\*\\*self The :class: Column to check. required Returns: Type Description Column A Column containing True if the column is null or only contains blank characters, or False otherwise. Source code in quinn/extensions/column_ext.py def isNullOrBlank(self: Column) -> Column: r\"\"\"Returns a Boolean value which expresses whether a given column is ``null`` or contains only blank characters. :param \\*\\*self: The :class:`Column` to check. :returns: A `Column` containing ``True`` if the column is ``null`` or only contains blank characters, or ``False`` otherwise. :rtype: Column \"\"\" return (self.isNull()) | (trim(self) == \"\")","title":"isNullOrBlank()"},{"location":"reference/quinn/extensions/column_ext/#quinn.extensions.column_ext.isTrue","text":"Function takes a column of type Column as an argument and returns a column of type Column. It evaluates whether each element in the column argument is equal to True, and if so will return True, otherwise False. Parameters: Name Type Description Default self Column Column object required Returns: Type Description Column Column object Source code in quinn/extensions/column_ext.py def isTrue(self: Column) -> Column: \"\"\"Function takes a column of type Column as an argument and returns a column of type Column. It evaluates whether each element in the column argument is equal to True, and if so will return True, otherwise False. :param self: Column object :returns: Column object :rtype: Column \"\"\" return self == lit(True)","title":"isTrue()"},{"location":"reference/quinn/extensions/column_ext/#quinn.extensions.column_ext.isTruthy","text":"Calculates a boolean expression that is the opposite of isFalsy for the given Column self. Parameters: Name Type Description Default self Column The Column to calculate the opposite of isFalsy for. required Returns: Type Description Column A Column with the results of the calculation. Source code in quinn/extensions/column_ext.py def isTruthy(self: Column) -> Column: \"\"\"Calculates a boolean expression that is the opposite of isFalsy for the given ``Column`` self. :param Column self: The ``Column`` to calculate the opposite of isFalsy for. :returns: A ``Column`` with the results of the calculation. :rtype: Column \"\"\" return ~(self.isFalsy())","title":"isTruthy()"},{"location":"reference/quinn/extensions/column_ext/#quinn.extensions.column_ext.nullBetween","text":"To see if a value is between two values in a null friendly way. :lower: Column :upper: Column Parameters: Name Type Description Default self Column Column object required Source code in quinn/extensions/column_ext.py def nullBetween(self: Column, lower: Column, upper: Column) -> Column: \"\"\"To see if a value is between two values in a null friendly way. :param self: Column object :lower: Column :upper: Column :rtype: Column \"\"\" return when(lower.isNull() & upper.isNull(), False).otherwise( when(self.isNull(), False).otherwise( when(lower.isNull() & upper.isNotNull() & (self <= upper), True).otherwise( when( lower.isNotNull() & upper.isNull() & (self >= lower), True, ).otherwise(self.between(lower, upper)), ), ), )","title":"nullBetween()"},{"location":"reference/quinn/extensions/dataframe_ext/","text":"","title":"Dataframe ext"},{"location":"reference/quinn/extensions/spark_session_ext/","text":"create_df(self, rows_data, col_specs) Creates a new DataFrame from the given data and column specs. The returned DataFrame is created using the StructType and StructField classes provided by PySpark. Parameters: Name Type Description Default rows_data array-like the data used to create the DataFrame required col_specs list [ tuple ] list of tuples containing the name and type of the field required Returns: Type Description DataFrame a new DataFrame Source code in quinn/extensions/spark_session_ext.py def create_df(self: Self, rows_data, col_specs: list[tuple]) -> DataFrame: # noqa: ANN001 \"\"\"Creates a new DataFrame from the given data and column specs. The returned DataFrame is created using the StructType and StructField classes provided by PySpark. :param rows_data: the data used to create the DataFrame :type rows_data: array-like :param col_specs: list of tuples containing the name and type of the field :type col_specs: list of tuples :return: a new DataFrame :rtype: DataFrame \"\"\" warnings.warn( \"Extensions may be removed in the future versions of quinn. Please use `quinn.create_df()` instead\", category=DeprecationWarning, stacklevel=2, ) struct_fields = [StructField(*x) for x in col_specs] return self.createDataFrame(data=rows_data, schema=StructType(struct_fields))","title":"Spark session ext"},{"location":"reference/quinn/extensions/spark_session_ext/#quinn.extensions.spark_session_ext.create_df","text":"Creates a new DataFrame from the given data and column specs. The returned DataFrame is created using the StructType and StructField classes provided by PySpark. Parameters: Name Type Description Default rows_data array-like the data used to create the DataFrame required col_specs list [ tuple ] list of tuples containing the name and type of the field required Returns: Type Description DataFrame a new DataFrame Source code in quinn/extensions/spark_session_ext.py def create_df(self: Self, rows_data, col_specs: list[tuple]) -> DataFrame: # noqa: ANN001 \"\"\"Creates a new DataFrame from the given data and column specs. The returned DataFrame is created using the StructType and StructField classes provided by PySpark. :param rows_data: the data used to create the DataFrame :type rows_data: array-like :param col_specs: list of tuples containing the name and type of the field :type col_specs: list of tuples :return: a new DataFrame :rtype: DataFrame \"\"\" warnings.warn( \"Extensions may be removed in the future versions of quinn. Please use `quinn.create_df()` instead\", category=DeprecationWarning, stacklevel=2, ) struct_fields = [StructField(*x) for x in col_specs] return self.createDataFrame(data=rows_data, schema=StructType(struct_fields))","title":"create_df()"}]} \ No newline at end of file diff --git a/search/worker.js b/search/worker.js new file mode 100644 index 00000000..8628dbce --- /dev/null +++ b/search/worker.js @@ -0,0 +1,133 @@ +var base_path = 'function' === typeof importScripts ? '.' : '/search/'; +var allowSearch = false; +var index; +var documents = {}; +var lang = ['en']; +var data; + +function getScript(script, callback) { + console.log('Loading script: ' + script); + $.getScript(base_path + script).done(function () { + callback(); + }).fail(function (jqxhr, settings, exception) { + console.log('Error: ' + exception); + }); +} + +function getScriptsInOrder(scripts, callback) { + if (scripts.length === 0) { + callback(); + return; + } + getScript(scripts[0], function() { + getScriptsInOrder(scripts.slice(1), callback); + }); +} + +function loadScripts(urls, callback) { + if( 'function' === typeof importScripts ) { + importScripts.apply(null, urls); + callback(); + } else { + getScriptsInOrder(urls, callback); + } +} + +function onJSONLoaded () { + data = JSON.parse(this.responseText); + var scriptsToLoad = ['lunr.js']; + if (data.config && data.config.lang && data.config.lang.length) { + lang = data.config.lang; + } + if (lang.length > 1 || lang[0] !== "en") { + scriptsToLoad.push('lunr.stemmer.support.js'); + if (lang.length > 1) { + scriptsToLoad.push('lunr.multi.js'); + } + if (lang.includes("ja") || lang.includes("jp")) { + scriptsToLoad.push('tinyseg.js'); + } + for (var i=0; i < lang.length; i++) { + if (lang[i] != 'en') { + scriptsToLoad.push(['lunr', lang[i], 'js'].join('.')); + } + } + } + loadScripts(scriptsToLoad, onScriptsLoaded); +} + +function onScriptsLoaded () { + console.log('All search scripts loaded, building Lunr index...'); + if (data.config && data.config.separator && data.config.separator.length) { + lunr.tokenizer.separator = new RegExp(data.config.separator); + } + + if (data.index) { + index = lunr.Index.load(data.index); + data.docs.forEach(function (doc) { + documents[doc.location] = doc; + }); + console.log('Lunr pre-built index loaded, search ready'); + } else { + index = lunr(function () { + if (lang.length === 1 && lang[0] !== "en" && lunr[lang[0]]) { + this.use(lunr[lang[0]]); + } else if (lang.length > 1) { + this.use(lunr.multiLanguage.apply(null, lang)); // spread operator not supported in all browsers: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Operators/Spread_operator#Browser_compatibility + } + this.field('title'); + this.field('text'); + this.ref('location'); + + for (var i=0; i < data.docs.length; i++) { + var doc = data.docs[i]; + this.add(doc); + documents[doc.location] = doc; + } + }); + console.log('Lunr index built, search ready'); + } + allowSearch = true; + postMessage({config: data.config}); + postMessage({allowSearch: allowSearch}); +} + +function init () { + var oReq = new XMLHttpRequest(); + oReq.addEventListener("load", onJSONLoaded); + var index_path = base_path + '/search_index.json'; + if( 'function' === typeof importScripts ){ + index_path = 'search_index.json'; + } + oReq.open("GET", index_path); + oReq.send(); +} + +function search (query) { + if (!allowSearch) { + console.error('Assets for search still loading'); + return; + } + + var resultDocuments = []; + var results = index.search(query); + for (var i=0; i < results.length; i++){ + var result = results[i]; + doc = documents[result.ref]; + doc.summary = doc.text.substring(0, 200); + resultDocuments.push(doc); + } + return resultDocuments; +} + +if( 'function' === typeof importScripts ) { + onmessage = function (e) { + if (e.data.init) { + init(); + } else if (e.data.query) { + postMessage({ results: search(e.data.query) }); + } else { + console.error("Worker - Unrecognized message: " + e); + } + }; +} diff --git a/sitemap.xml b/sitemap.xml new file mode 100644 index 00000000..0f8724ef --- /dev/null +++ b/sitemap.xml @@ -0,0 +1,3 @@ + + + \ No newline at end of file diff --git a/sitemap.xml.gz b/sitemap.xml.gz new file mode 100644 index 00000000..2d3578ea Binary files /dev/null and b/sitemap.xml.gz differ